diff --git a/data_scaling/n100_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/data_scaling/n100_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..0fbdd77c4b43aeda4f648ea2565be985ffa4eb22 --- /dev/null +++ b/data_scaling/n100_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 6, "eval/best/id_best": 22, "eval/best/lr_best": 0.00021599999999999996, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.149545192718506, "eval/best/train/acc": 0.3522849503672516, "eval/best/train/acc_std": 0.002339245471254718, "eval/best/train/f1": 0.29712255139127103, "eval/best/train/f1_std": 0.002368208750693375, "eval/best/validation/loss": 2.4543910026550293, "eval/best/validation/acc": 0.2593207825765965, "eval/best/validation/acc_std": 0.0056060123391739455, "eval/best/validation/f1": 0.20393290461595417, "eval/best/validation/f1_std": 0.004958709413239012, "eval/best/test/loss": 2.3971211910247803, "eval/best/test/acc": 0.2717996289424861, "eval/best/test/acc_std": 0.005388663388073568, "eval/best/test/f1": 0.20969777817050883, "eval/best/test/f1_std": 0.005083817199895967, "eval/best/testid/loss": 2.3186991214752197, "eval/best/testid/acc": 0.29053402737613265, "eval/best/testid/acc_std": 0.005916682826613068, "eval/best/testid/f1": 0.2350341574968312, "eval/best/testid/f1_std": 0.00553450952962364} diff --git a/data_scaling/n100_2/eval_v2/adhd200_dx__patch__logistic/log.txt b/data_scaling/n100_2/eval_v2/adhd200_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0e6fc839071588615a23dd97d8b289d075e110d6 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/adhd200_dx__patch__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:21:07 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n100_2; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n100_2/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/adhd200_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:10:55 time: 4.3443 data: 3.2775 max mem: 2698 +extract (train) [ 20/151] eta: 0:00:49 time: 0.1785 data: 0.0587 max mem: 2851 +extract (train) [ 40/151] eta: 0:00:31 time: 0.1801 data: 0.0577 max mem: 2851 +extract (train) [ 60/151] eta: 0:00:22 time: 0.1635 data: 0.0539 max mem: 2851 +extract (train) [ 80/151] eta: 0:00:15 time: 0.1632 data: 0.0512 max mem: 2851 +extract (train) [100/151] eta: 0:00:10 time: 0.1721 data: 0.0563 max mem: 2851 +extract (train) [120/151] eta: 0:00:06 time: 0.1678 data: 0.0540 max mem: 2851 +extract (train) [140/151] eta: 0:00:02 time: 0.1508 data: 0.0464 max mem: 2851 +extract (train) [150/151] eta: 0:00:00 time: 0.1396 data: 0.0413 max mem: 2851 +extract (train) Total time: 0:00:29 (0.1955 s / it) +extract (validation) [ 0/32] eta: 0:01:41 time: 3.1854 data: 3.0519 max mem: 2851 +extract (validation) [20/32] eta: 0:00:04 time: 0.2043 data: 0.0751 max mem: 2851 +extract (validation) [31/32] eta: 0:00:00 time: 0.1554 data: 0.0482 max mem: 2851 +extract (validation) Total time: 0:00:09 (0.2875 s / it) +extract (test) [ 0/33] eta: 0:01:58 time: 3.5812 data: 3.3825 max mem: 2851 +extract (test) [20/33] eta: 0:00:04 time: 0.1732 data: 0.0563 max mem: 2851 +extract (test) [32/33] eta: 0:00:00 time: 0.1371 data: 0.0376 max mem: 2851 +extract (test) Total time: 0:00:08 (0.2725 s / it) +feature extraction time: 0:00:47 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | train | 0.73699 | 0.022243 | 0.72603 | 0.023651 | 0.72322 | 0.023123 | +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | test | 0.6 | 0.057698 | 0.57063 | 0.063737 | 0.57481 | 0.059405 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.059122930803078115, "f1": 0.6515594541910331, "f1_std": 0.061600951342251554, "bacc": 0.6505791505791505, "bacc_std": 0.06073906918317193} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05720041378589937, "f1": 0.6289401836684041, "f1_std": 0.06114509713123287, "bacc": 0.6283783783783784, "bacc_std": 0.0590464400633895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05825007098047837, "f1": 0.5321419707123356, "f1_std": 0.062020678710289406, "bacc": 0.5342664092664092, "bacc_std": 0.0594503654731437} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 0.000774263682681127, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05740152359687505, "f1": 0.61, "f1_std": 0.06252870715084341, "bacc": 0.6105212355212355, "bacc_std": 0.05948094634923437} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.059869491198200106, "f1": 0.5294401544401545, "f1_std": 0.06137770864406378, "bacc": 0.5294401544401545, "bacc_std": 0.06129658833442081} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05803295533854401, "f1": 0.6407113674597452, "f1_std": 0.059151268573164356, "bacc": 0.6414092664092663, "bacc_std": 0.058949435053899574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05758279657622111, "f1": 0.5921814671814671, "f1_std": 0.05832330670934808, "bacc": 0.5921814671814671, "bacc_std": 0.057999724558730946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05972668320265686, "f1": 0.6018132810585641, "f1_std": 0.06274203372096271, "bacc": 0.6013513513513513, "bacc_std": 0.0613568925163966} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.0589670372266569, "f1": 0.6474358974358974, "f1_std": 0.061831955306588915, "bacc": 0.6462355212355213, "bacc_std": 0.060170577715491} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0565725172158886, "f1": 0.5905769715293525, "f1_std": 0.06206736620172798, "bacc": 0.5926640926640927, "bacc_std": 0.058373890387370814} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05254170279448868, "f1": 0.6474358974358974, "f1_std": 0.05584366223489811, "bacc": 0.6462355212355213, "bacc_std": 0.054196783526384434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0609104182653053, "f1": 0.545, "f1_std": 0.0647101820301012, "bacc": 0.5477799227799228, "bacc_std": 0.06180624994629174} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06217738320521796, "f1": 0.5512820512820513, "f1_std": 0.06433910949899092, "bacc": 0.5521235521235521, "bacc_std": 0.06259531700295555} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.052824335435490186, "f1": 0.5834401435529352, "f1_std": 0.05850498151483573, "bacc": 0.5883204633204633, "bacc_std": 0.054252070047106755} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06194102176679678, "f1": 0.5976190476190476, "f1_std": 0.062467209301858354, "bacc": 0.6008687258687259, "bacc_std": 0.06283432235034539} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0620971866650654, "f1": 0.5608108108108107, "f1_std": 0.06269961058095246, "bacc": 0.5608108108108107, "bacc_std": 0.06256609131331416} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.0547049839253444, "f1": 0.5962732919254659, "f1_std": 0.06323173847167696, "bacc": 0.6018339768339769, "bacc_std": 0.057167468155706364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.055422209463130825, "f1": 0.6153846153846154, "f1_std": 0.05868127582590679, "bacc": 0.6148648648648649, "bacc_std": 0.0570418421950532} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05622018420839651, "f1": 0.7031963470319634, "f1_std": 0.05722150076622555, "bacc": 0.7041505791505791, "bacc_std": 0.057113674287631956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.054800025914562185, "f1": 0.6233308138070043, "f1_std": 0.060245812857813906, "bacc": 0.6240347490347491, "bacc_std": 0.056897537985958695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.7538461538461538, "acc_std": 0.052685010162834625, "f1": 0.746588693957115, "f1_std": 0.05488980693547636, "bacc": 0.7446911196911197, "bacc_std": 0.05451018080389148} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.0589707057954357, "f1": 0.6375757575757576, "f1_std": 0.06072792769693355, "bacc": 0.6370656370656371, "bacc_std": 0.06038504559797015} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06275302203996178, "f1": 0.564176245210728, "f1_std": 0.06359271470801398, "bacc": 0.5651544401544402, "bacc_std": 0.06417945317634577} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.055343266616304045, "f1": 0.5834401435529352, "f1_std": 0.06206076630217447, "bacc": 0.5883204633204633, "bacc_std": 0.057327277757238136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05711070946936901, "f1": 0.6036585365853658, "f1_std": 0.06310534555623716, "bacc": 0.6061776061776062, "bacc_std": 0.05881955612360381} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.057707682054015455, "f1": 0.6233308138070043, "f1_std": 0.06348168714400226, "bacc": 0.6240347490347491, "bacc_std": 0.06000536097182148} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05810544180078063, "f1": 0.5512820512820513, "f1_std": 0.06080607441553064, "bacc": 0.5521235521235521, "bacc_std": 0.058997058843763446} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05251631699096823, "f1": 0.6167649320687003, "f1_std": 0.0597024052397967, "bacc": 0.6196911196911197, "bacc_std": 0.05489544667144013} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06155700489847606, "f1": 0.5565302144249512, "f1_std": 0.06368651816187636, "bacc": 0.5564671814671815, "bacc_std": 0.06290229548602254} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06290495131075362, "f1": 0.5565302144249512, "f1_std": 0.06497695177551632, "bacc": 0.5564671814671815, "bacc_std": 0.0641388109269871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05967228847492209, "f1": 0.5699583435432491, "f1_std": 0.06237835979579026, "bacc": 0.5699806949806949, "bacc_std": 0.06088549532232527} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06104549472348883, "f1": 0.5666666666666667, "f1_std": 0.06115848645631313, "bacc": 0.5694980694980695, "bacc_std": 0.06153708660106956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05995130963229426, "f1": 0.5921814671814671, "f1_std": 0.06127011545322799, "bacc": 0.5921814671814671, "bacc_std": 0.06101408706474319} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06195471528109971, "f1": 0.5565302144249512, "f1_std": 0.0637996261296814, "bacc": 0.5564671814671815, "bacc_std": 0.06284883969040381} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05549134161365033, "f1": 0.6153846153846154, "f1_std": 0.05956325767040712, "bacc": 0.6148648648648649, "bacc_std": 0.057557275784012464} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 2.782559402207126, "split": "test", "acc": 0.6, "acc_std": 0.0620975449495179, "f1": 0.5921814671814671, "f1_std": 0.06296931327152987, "bacc": 0.5921814671814671, "bacc_std": 0.06253105973065382} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05140626904416834, "f1": 0.6972049689440993, "f1_std": 0.06070343556729061, "bacc": 0.6959459459459459, "bacc_std": 0.05514814137174209} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05893640339357683, "f1": 0.5775, "f1_std": 0.06319632370194196, "bacc": 0.5791505791505791, "bacc_std": 0.06003937140263874} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.059831234050847865, "f1": 0.61, "f1_std": 0.06402583167069614, "bacc": 0.6105212355212355, "bacc_std": 0.06119581594698507} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.05348250833894061, "f1": 0.7292330311198236, "f1_std": 0.056235613019430704, "bacc": 0.7268339768339769, "bacc_std": 0.05541515098494524} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05848235257007175, "f1": 0.6575670498084292, "f1_std": 0.05883378806746763, "bacc": 0.6592664092664093, "bacc_std": 0.05855560614118736} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.059302581841077985, "f1": 0.6515594541910331, "f1_std": 0.061318546631065264, "bacc": 0.6505791505791505, "bacc_std": 0.060531280533197264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.0585299276100313, "f1": 0.5775, "f1_std": 0.06247758071783749, "bacc": 0.5791505791505791, "bacc_std": 0.05965248168677731} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 1291.5496650148827, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.06032159180484069, "f1": 0.6575670498084292, "f1_std": 0.060875858690351115, "bacc": 0.6592664092664093, "bacc_std": 0.06080087572056158} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05642791704896794, "f1": 0.5775, "f1_std": 0.060774494514319, "bacc": 0.5791505791505791, "bacc_std": 0.05779987177181666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06256827560694025, "f1": 0.5745454545454545, "f1_std": 0.06368693766884269, "bacc": 0.5743243243243243, "bacc_std": 0.06321664518489406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06152672965092864, "f1": 0.5500119360229172, "f1_std": 0.061872804653438834, "bacc": 0.5516409266409266, "bacc_std": 0.062165415295041875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05884424531847267, "f1": 0.6549227799227799, "f1_std": 0.05996019039309655, "bacc": 0.6549227799227799, "bacc_std": 0.059771924620375914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06091433503714665, "f1": 0.578226387887527, "f1_std": 0.06201673472856258, "bacc": 0.5786679536679536, "bacc_std": 0.06221860279149141} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05650990173485387, "f1": 0.570630081300813, "f1_std": 0.061919741186618846, "bacc": 0.5748069498069498, "bacc_std": 0.057854780214648176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.05826035017374048, "f1": 0.5115151515151515, "f1_std": 0.059569542782976, "bacc": 0.5115830115830116, "bacc_std": 0.059358888018544026} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.055671827938693574, "f1": 0.5501153550371699, "f1_std": 0.06294531714596899, "bacc": 0.556949806949807, "bacc_std": 0.05748947780982853} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.049741304732080605, "f1": 0.6003742314889067, "f1_std": 0.06065371614417824, "bacc": 0.6110038610038611, "bacc_std": 0.0524573417539937} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06022131962418381, "f1": 0.545, "f1_std": 0.06480618023797244, "bacc": 0.5477799227799228, "bacc_std": 0.06151680349271781} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.000774263682681127, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.060481844309877485, "f1": 0.5115151515151515, "f1_std": 0.06219835497319645, "bacc": 0.5115830115830116, "bacc_std": 0.06141254862999763} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.0581879916036282, "f1": 0.6153846153846154, "f1_std": 0.06024653377601955, "bacc": 0.6148648648648649, "bacc_std": 0.05888822577163781} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.053528513664662214, "f1": 0.6036585365853658, "f1_std": 0.05935744950938239, "bacc": 0.6061776061776062, "bacc_std": 0.055303033535392815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05294141900396934, "f1": 0.5905769715293525, "f1_std": 0.05840542878762152, "bacc": 0.5926640926640927, "bacc_std": 0.05471812488046308} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05583916040127688, "f1": 0.6655231560891939, "f1_std": 0.05874122242498554, "bacc": 0.6640926640926641, "bacc_std": 0.057716956074708566} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.056803125174463766, "f1": 0.6094688776736361, "f1_std": 0.05775627186179284, "bacc": 0.61003861003861, "bacc_std": 0.05758859389840303} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05991628084301605, "f1": 0.5745454545454545, "f1_std": 0.06175460412692856, "bacc": 0.5743243243243243, "bacc_std": 0.06133289138415185} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.0553027086660212, "f1": 0.6198830409356726, "f1_std": 0.05735647673238008, "bacc": 0.6192084942084942, "bacc_std": 0.056587189004356254} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.058059271897384634, "f1": 0.5745454545454545, "f1_std": 0.06002922075689464, "bacc": 0.5743243243243243, "bacc_std": 0.05954280671839976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 0.000774263682681127, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05556465608565539, "f1": 0.6425000000000001, "f1_std": 0.05972067738941332, "bacc": 0.6418918918918919, "bacc_std": 0.05702140139337761} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05441635674736784, "f1": 0.7115384615384616, "f1_std": 0.058145537945652526, "bacc": 0.708976833976834, "bacc_std": 0.0567151507142303} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05923417372833107, "f1": 0.6375757575757576, "f1_std": 0.06037800601765378, "bacc": 0.6370656370656371, "bacc_std": 0.059808980725304876} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.053146888111876615, "f1": 0.5411764705882354, "f1_std": 0.06229728210453956, "bacc": 0.5526061776061776, "bacc_std": 0.055110476271354014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 21.54434690031882, "split": "test", "acc": 0.6, "acc_std": 0.06046192992631149, "f1": 0.5775, "f1_std": 0.0646583395766756, "bacc": 0.5791505791505791, "bacc_std": 0.061840377837319584} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.0622122802238504, "f1": 0.5250692869740489, "f1_std": 0.06719582898180877, "bacc": 0.5299227799227799, "bacc_std": 0.06327566349871941} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.061739203353859615, "f1": 0.5248538011695907, "f1_std": 0.06376554465089636, "bacc": 0.525096525096525, "bacc_std": 0.06307888907461902} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.061101029925756285, "f1": 0.6094688776736361, "f1_std": 0.06251779239719527, "bacc": 0.61003861003861, "bacc_std": 0.062434006840175735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05700683235289646, "f1": 0.6794871794871795, "f1_std": 0.060321565900486734, "bacc": 0.6776061776061776, "bacc_std": 0.058686972056237144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.061547776218125454, "f1": 0.5381034060279344, "f1_std": 0.06397983576097896, "bacc": 0.5386100386100386, "bacc_std": 0.06281982020214182} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.06186531658732301, "f1": 0.599146110056926, "f1_std": 0.061851687790565685, "bacc": 0.6052123552123552, "bacc_std": 0.061612212579144975} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05968948446931852, "f1": 0.543030303030303, "f1_std": 0.06094808378282227, "bacc": 0.542953667953668, "bacc_std": 0.06035233409338714} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.04860023132874061, "f1": 0.4724025974025974, "f1_std": 0.05639163431280367, "bacc": 0.49903474903474904, "bacc_std": 0.04879020814300638} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.05107632993168108, "f1": 0.7257383966244726, "f1_std": 0.05543660181438532, "bacc": 0.7224903474903475, "bacc_std": 0.05356940476715722} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05598203431173677, "f1": 0.5626293995859213, "f1_std": 0.0634164721573166, "bacc": 0.5704633204633205, "bacc_std": 0.05757611787628695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06311076765562941, "f1": 0.5330459770114943, "f1_std": 0.06356327982182053, "bacc": 0.5337837837837838, "bacc_std": 0.06365158403298632} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06003877248231705, "f1": 0.5578231292517006, "f1_std": 0.06551606041388, "bacc": 0.5612934362934363, "bacc_std": 0.061829995007869655} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06163788890862847, "f1": 0.5330459770114943, "f1_std": 0.06200614815994654, "bacc": 0.5337837837837838, "bacc_std": 0.06198603471017203} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05405673110274791, "f1": 0.7198275862068966, "f1_std": 0.05493725813150368, "bacc": 0.722007722007722, "bacc_std": 0.05494035957128832} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.7692307692307693, "acc_std": 0.053206966665796204, "f1": 0.7636363636363637, "f1_std": 0.05483435928383949, "bacc": 0.7625482625482626, "bacc_std": 0.05474843785412051} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.060547990648721225, "f1": 0.6198830409356726, "f1_std": 0.06292906052095694, "bacc": 0.6192084942084942, "bacc_std": 0.06168062786419753} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05924568044075091, "f1": 0.6198830409356726, "f1_std": 0.061448234196436344, "bacc": 0.6192084942084942, "bacc_std": 0.060477112509962586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.059987367506459464, "f1": 0.4715447154471545, "f1_std": 0.06314298713827904, "bacc": 0.4806949806949807, "bacc_std": 0.059988042305611326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05550393983563785, "f1": 0.6366869918699187, "f1_std": 0.061981169394156384, "bacc": 0.6375482625482626, "bacc_std": 0.057996017247218846} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.058685917587415495, "f1": 0.6018132810585641, "f1_std": 0.06142977436334669, "bacc": 0.6013513513513513, "bacc_std": 0.060250911405957834} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05982715933489438, "f1": 0.5512820512820513, "f1_std": 0.062375614829810154, "bacc": 0.5521235521235521, "bacc_std": 0.060586015306990676} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05589826769552833, "f1": 0.5501153550371699, "f1_std": 0.06170522424712748, "bacc": 0.556949806949807, "bacc_std": 0.05692418449684527} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05823473413998753, "f1": 0.5250692869740489, "f1_std": 0.06245076740475483, "bacc": 0.5299227799227799, "bacc_std": 0.059145661797415386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.051588393722463446, "f1": 0.677124183006536, "f1_std": 0.06112912497719481, "bacc": 0.678088803088803, "bacc_std": 0.05489723214261481} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.046770232782788385, "f1": 0.5902987119758838, "f1_std": 0.062023130297110325, "bacc": 0.6066602316602316, "bacc_std": 0.050252041913286805} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05382860373334661, "f1": 0.5289855072463768, "f1_std": 0.06044560551466636, "bacc": 0.5390926640926641, "bacc_std": 0.05500088311792442} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.055887460883669325, "f1": 0.570630081300813, "f1_std": 0.06216203424157956, "bacc": 0.5748069498069498, "bacc_std": 0.05778176669267205} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.0573820663488301, "f1": 0.6575670498084292, "f1_std": 0.05796864738982916, "bacc": 0.6592664092664093, "bacc_std": 0.057926941380571306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.057039581398830815, "f1": 0.5626293995859213, "f1_std": 0.06574931455018576, "bacc": 0.5704633204633205, "bacc_std": 0.05935925275804624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.000774263682681127, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.0558516632162702, "f1": 0.6366869918699187, "f1_std": 0.06191037576540796, "bacc": 0.6375482625482626, "bacc_std": 0.05794528698701383} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.06096095170792607, "f1": 0.48000000000000004, "f1_std": 0.0613940233805348, "bacc": 0.48021235521235517, "bacc_std": 0.061038339700273066} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 21.54434690031882, "split": "test", "acc": 0.4307692307692308, "acc_std": 0.055858519504041915, "f1": 0.4106836559666748, "f1_std": 0.056675403332023526, "bacc": 0.41312741312741313, "bacc_std": 0.05592474733507681} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | train | 100 | 13.41 | 129.14 | 0.79447 | 0.084271 | 0.78581 | 0.088926 | 0.78314 | 0.089147 | +| flat_mae | patch | logistic | adhd200_dx | test | 100 | 13.41 | 129.14 | 0.61292 | 0.058254 | 0.59506 | 0.061189 | 0.59693 | 0.059548 | + + +done! total time: 0:04:35 diff --git a/data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml b/data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ed5a9e4f275bc9c92070731d1fb97680367497b --- /dev/null +++ b/data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n100_2; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null diff --git a/data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv b/data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..b14022540618dfe16e03e9996f32012699a9aa05 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adni_ad_vs_cn,,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,,2.782559402207126,test,0.7804878048780488,0.06072673111163696,0.6660633484162897,0.09192281949849279,0.6597222222222222,0.09019382287495847 +flat_mae,patch,logistic,adni_ad_vs_cn,1,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,1,21.54434690031882,test,0.7317073170731707,0.06334575997148711,0.6232247284878863,0.08793672358275198,0.6193548387096774,0.08598658124385597 +flat_mae,patch,logistic,adni_ad_vs_cn,2,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,2,166.81005372000556,test,0.7560975609756098,0.05729319689518083,0.6440972222222222,0.08939344958909794,0.635483870967742,0.08318923577609934 +flat_mae,patch,logistic,adni_ad_vs_cn,3,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,3,166.81005372000556,test,0.7073170731707317,0.0642529208256111,0.603225806451613,0.08545209402104924,0.603225806451613,0.08584654768981771 +flat_mae,patch,logistic,adni_ad_vs_cn,4,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,4,166.81005372000556,test,0.7804878048780488,0.06269004420065305,0.6917293233082706,0.0874938582730628,0.685483870967742,0.08655773722232406 +flat_mae,patch,logistic,adni_ad_vs_cn,5,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,5,2.782559402207126,test,0.6829268292682927,0.06275453819643223,0.5547201336675021,0.08491336852780806,0.5532258064516129,0.08101260002717708 +flat_mae,patch,logistic,adni_ad_vs_cn,6,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,6,2.782559402207126,test,0.7317073170731707,0.057780050240804244,0.5918552036199095,0.0876345941865284,0.5854838709677419,0.0778780013259887 +flat_mae,patch,logistic,adni_ad_vs_cn,7,0.3593813663804626,train,0.991869918699187,0.004915875618304956,0.9884880564885973,0.0070827638860623265,0.9825581395348837,0.010546268041596151 +flat_mae,patch,logistic,adni_ad_vs_cn,7,0.3593813663804626,test,0.7073170731707317,0.04773242393868619,0.4831932773109243,0.07144347037645617,0.5016129032258064,0.05543197272708444 +flat_mae,patch,logistic,adni_ad_vs_cn,8,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,8,1291.5496650148827,test,0.6829268292682927,0.06260260584176712,0.5547201336675021,0.0806557428489314,0.5532258064516129,0.07738504959037022 +flat_mae,patch,logistic,adni_ad_vs_cn,9,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,9,0.3593813663804626,test,0.7317073170731707,0.06424477285185493,0.6232247284878863,0.0876379328977669,0.6193548387096774,0.08531574916245467 +flat_mae,patch,logistic,adni_ad_vs_cn,10,0.046415888336127774,train,0.8834688346883469,0.013979079494849755,0.8072919829481278,0.027287605424020506,0.7661886761443011,0.027738341867105776 +flat_mae,patch,logistic,adni_ad_vs_cn,10,0.046415888336127774,test,0.7560975609756098,0.05224595199914958,0.6117424242424243,0.08828437133556405,0.6016129032258064,0.07641454552603223 +flat_mae,patch,logistic,adni_ad_vs_cn,11,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,11,166.81005372000556,test,0.7560975609756098,0.06143119558173057,0.6693548387096775,0.08116957425182983,0.6693548387096775,0.0828933280774168 +flat_mae,patch,logistic,adni_ad_vs_cn,12,0.3593813663804626,train,0.994579945799458,0.004212114489896403,0.9923570836785418,0.006023515019202035,0.9883720930232558,0.009036454923091743 +flat_mae,patch,logistic,adni_ad_vs_cn,12,0.3593813663804626,test,0.7560975609756098,0.053071646319570696,0.6117424242424243,0.08833637303289316,0.6016129032258064,0.07667909391958687 +flat_mae,patch,logistic,adni_ad_vs_cn,13,0.3593813663804626,train,0.994579945799458,0.0037371617623223187,0.9923570836785418,0.0053327140117987535,0.9883720930232558,0.008017515641261283 +flat_mae,patch,logistic,adni_ad_vs_cn,13,0.3593813663804626,test,0.7073170731707317,0.06553910422056822,0.603225806451613,0.08612420304963354,0.603225806451613,0.08705834875190863 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.3593813663804626,train,0.991869918699187,0.00476793514463414,0.9884880564885973,0.006871187625581639,0.9825581395348837,0.010228884118430273 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.3593813663804626,test,0.8048780487804879,0.05011945278919316,0.6893939393939394,0.09085930439083088,0.667741935483871,0.08125269764192135 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.3593813663804626,train,0.989159891598916,0.005253819324033648,0.9845864661654136,0.007620386290350128,0.9767441860465116,0.011271275177723394 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.3593813663804626,test,0.7560975609756098,0.06144877874522968,0.6693548387096775,0.08285053772311125,0.6693548387096775,0.08349706357345876 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.046415888336127774,train,0.8943089430894309,0.013342005904459428,0.82903881107666,0.02488220586578705,0.7894444900977895,0.026078776228770594 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.046415888336127774,test,0.7804878048780488,0.038199077762942416,0.5886287625418061,0.08830918359080019,0.5838709677419355,0.06300486833509769 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.005994842503189409,train,0.8401084010840109,0.012457492003329618,0.7003592220983526,0.03168373356809912,0.665071082258197,0.02542770843253441 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.005994842503189409,test,0.7560975609756098,0.032299129855323745,0.5119047619047619,0.07636737572965696,0.5338709677419355,0.04939062417608875 +flat_mae,patch,logistic,adni_ad_vs_cn,18,0.046415888336127774,train,0.9024390243902439,0.012524284032555507,0.8413282369804109,0.023876400991301686,0.7987920124907552,0.025762108769589422 +flat_mae,patch,logistic,adni_ad_vs_cn,18,0.046415888336127774,test,0.7804878048780488,0.05160161769019545,0.6328358208955224,0.09780049056868827,0.6177419354838709,0.07914255134832966 +flat_mae,patch,logistic,adni_ad_vs_cn,19,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,19,2.782559402207126,test,0.7317073170731707,0.06122856798243978,0.6232247284878863,0.08474170587545705,0.6193548387096774,0.08302419967412908 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.046415888336127774,train,0.8997289972899729,0.013198565390327354,0.8360216668067884,0.025322764961396772,0.7929780590023832,0.02681242023373551 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.046415888336127774,test,0.8536585365853658,0.05453789430777899,0.8136363636363637,0.06643653684392899,0.8354838709677419,0.06950050262209798 +flat_mae,patch,logistic,adni_ad_vs_cn,21,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,21,21.54434690031882,test,0.7317073170731707,0.05776112371010982,0.5918552036199095,0.08747961129875054,0.5854838709677419,0.07808246094736164 +flat_mae,patch,logistic,adni_ad_vs_cn,22,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,22,2.782559402207126,test,0.6829268292682927,0.06435209512930587,0.5547201336675021,0.08474631590071453,0.5532258064516129,0.08136621361475586 +flat_mae,patch,logistic,adni_ad_vs_cn,23,0.3593813663804626,train,0.997289972899729,0.002533108589409639,0.9961941891766453,0.003583194165191257,0.9941860465116279,0.005434401566814858 +flat_mae,patch,logistic,adni_ad_vs_cn,23,0.3593813663804626,test,0.7073170731707317,0.05574019789089487,0.5340909090909092,0.08645497420092481,0.535483870967742,0.07311589555727646 +flat_mae,patch,logistic,adni_ad_vs_cn,24,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,24,166.81005372000556,test,0.7317073170731707,0.06757143530901505,0.6479313036690086,0.08623591685943618,0.6532258064516129,0.0898230833690946 +flat_mae,patch,logistic,adni_ad_vs_cn,25,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,25,2.782559402207126,test,0.7804878048780488,0.047625747014646946,0.6328358208955224,0.09328656885900573,0.6177419354838709,0.07696194851516416 +flat_mae,patch,logistic,adni_ad_vs_cn,26,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,26,21.54434690031882,test,0.8292682926829268,0.06125485327200916,0.7759562841530054,0.07677483571327794,0.7854838709677419,0.07949651469136822 +flat_mae,patch,logistic,adni_ad_vs_cn,27,0.3593813663804626,train,0.997289972899729,0.0028970481237489583,0.9961941891766453,0.00410706178964036,0.9941860465116279,0.006215178823624222 +flat_mae,patch,logistic,adni_ad_vs_cn,27,0.3593813663804626,test,0.7560975609756098,0.03239406383587121,0.5119047619047619,0.07574509359716605,0.5338709677419355,0.04871810170689122 +flat_mae,patch,logistic,adni_ad_vs_cn,28,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,28,21.54434690031882,test,0.8292682926829268,0.05182616346510896,0.7402714932126697,0.08835546731515824,0.717741935483871,0.08446826297830635 +flat_mae,patch,logistic,adni_ad_vs_cn,29,0.046415888336127774,train,0.9051490514905149,0.013179320696383065,0.8465732919918743,0.024781174163095044,0.8046059659791273,0.02697858104254195 +flat_mae,patch,logistic,adni_ad_vs_cn,29,0.046415888336127774,test,0.7560975609756098,0.06098466274259032,0.6440972222222222,0.09241747221030037,0.635483870967742,0.0870902707249367 +flat_mae,patch,logistic,adni_ad_vs_cn,30,0.046415888336127774,train,0.907859078590786,0.013180011674895575,0.8563215758131013,0.022920683035225084,0.8225614265757252,0.0255469757196401 +flat_mae,patch,logistic,adni_ad_vs_cn,30,0.046415888336127774,test,0.7073170731707317,0.05681306164804977,0.5340909090909092,0.08578774121939232,0.535483870967742,0.07177831840267589 +flat_mae,patch,logistic,adni_ad_vs_cn,31,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,31,166.81005372000556,test,0.7073170731707317,0.06581706300248026,0.603225806451613,0.08650869472982525,0.603225806451613,0.08836093113998447 +flat_mae,patch,logistic,adni_ad_vs_cn,32,0.046415888336127774,train,0.9051490514905149,0.012685264880735061,0.8448853604929079,0.024567632245317838,0.800558796943052,0.026659936918799674 +flat_mae,patch,logistic,adni_ad_vs_cn,32,0.046415888336127774,test,0.7560975609756098,0.04622321241382579,0.569327731092437,0.09002649008430437,0.567741935483871,0.06821251117310594 +flat_mae,patch,logistic,adni_ad_vs_cn,33,0.3593813663804626,train,0.997289972899729,0.0025979196724798133,0.9961941891766453,0.003676374741342989,0.9941860465116279,0.00557344394851773 +flat_mae,patch,logistic,adni_ad_vs_cn,33,0.3593813663804626,test,0.7073170731707317,0.06340194995096579,0.603225806451613,0.08470539239040872,0.603225806451613,0.0843427761683204 +flat_mae,patch,logistic,adni_ad_vs_cn,34,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,34,2.782559402207126,test,0.6829268292682927,0.06802125503273543,0.5839188134270101,0.08584975342551027,0.5870967741935484,0.08833987530667423 +flat_mae,patch,logistic,adni_ad_vs_cn,35,0.3593813663804626,train,0.994579945799458,0.004008535923501009,0.9923570836785418,0.0057285126158150975,0.9883720930232558,0.008599707882394643 +flat_mae,patch,logistic,adni_ad_vs_cn,35,0.3593813663804626,test,0.7804878048780488,0.04837576002750906,0.6328358208955224,0.09439119830808082,0.6177419354838709,0.07685922371811668 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,test,0.6829268292682927,0.06853925869276294,0.6072218128224024,0.0802813898542739,0.6209677419354839,0.08805234578553298 +flat_mae,patch,logistic,adni_ad_vs_cn,37,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,37,2.782559402207126,test,0.7317073170731707,0.05868347423867302,0.5918552036199095,0.09064578600359427,0.5854838709677419,0.08130508468112682 +flat_mae,patch,logistic,adni_ad_vs_cn,38,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,38,166.81005372000556,test,0.6585365853658537,0.0711094873484387,0.5651515151515152,0.08183522417693422,0.5709677419354839,0.08507408780723476 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.046415888336127774,train,0.9024390243902439,0.01332047764140417,0.8446969696969697,0.023985250500484775,0.8068863505629058,0.02598011424308625 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.046415888336127774,test,0.6829268292682927,0.04128740259843674,0.4057971014492754,0.014814153536817585,0.45161290322580644,0.027302959782837203 +flat_mae,patch,logistic,adni_ad_vs_cn,40,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,40,166.81005372000556,test,0.7073170731707317,0.06870426497805573,0.6272727272727273,0.08191372207427694,0.6370967741935484,0.08724257921148011 +flat_mae,patch,logistic,adni_ad_vs_cn,41,0.3593813663804626,train,0.994579945799458,0.0038182541154082234,0.9923570836785418,0.005450899496294837,0.9883720930232558,0.008191487026660695 +flat_mae,patch,logistic,adni_ad_vs_cn,41,0.3593813663804626,test,0.8048780487804879,0.049745838253640935,0.6893939393939394,0.09153024242130577,0.667741935483871,0.0805475715979233 +flat_mae,patch,logistic,adni_ad_vs_cn,42,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,42,2.782559402207126,test,0.6829268292682927,0.06810320404321528,0.5839188134270101,0.08258727178429913,0.5870967741935484,0.08620327083229049 +flat_mae,patch,logistic,adni_ad_vs_cn,43,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,43,1291.5496650148827,test,0.7073170731707317,0.061078625175273925,0.5729166666666666,0.08822309372012205,0.5693548387096774,0.08178043394144081 +flat_mae,patch,logistic,adni_ad_vs_cn,44,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,44,21.54434690031882,test,0.7804878048780488,0.06379033174978144,0.7280766396462786,0.0750444962079556,0.7532258064516129,0.08020693131784559 +flat_mae,patch,logistic,adni_ad_vs_cn,45,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,45,21.54434690031882,test,0.7560975609756098,0.05880151430425519,0.6440972222222222,0.09062438890017298,0.635483870967742,0.08711843093952762 +flat_mae,patch,logistic,adni_ad_vs_cn,46,0.3593813663804626,train,0.997289972899729,0.00265245624321664,0.9961941891766453,0.0037532045270821015,0.9941860465116279,0.0056904439171333665 +flat_mae,patch,logistic,adni_ad_vs_cn,46,0.3593813663804626,test,0.6585365853658537,0.056022834640794175,0.4564393939393939,0.06534534917868648,0.4693548387096774,0.0577389784789794 +flat_mae,patch,logistic,adni_ad_vs_cn,47,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,47,21.54434690031882,test,0.7317073170731707,0.06143961986175282,0.6232247284878863,0.08314190073532594,0.6193548387096774,0.07997818380993593 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.046415888336127774,train,0.8970189701897019,0.013701631159301987,0.8343179884677191,0.025404953683060276,0.7952584435861616,0.027184919500072718 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.046415888336127774,test,0.7317073170731707,0.05915227025854575,0.5918552036199095,0.0907288989075725,0.5854838709677419,0.0824058507970006 +flat_mae,patch,logistic,adni_ad_vs_cn,49,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,49,166.81005372000556,test,0.7804878048780488,0.05449145128310245,0.6660633484162897,0.0886720133697319,0.6516129032258065,0.08127285084897458 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.3593813663804626,train,0.994579945799458,0.0037457241286007505,0.9923570836785418,0.005341360652671152,0.9883720930232558,0.008035884903800477 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.3593813663804626,test,0.7804878048780488,0.047319185268425525,0.6328358208955224,0.09088399060254398,0.6177419354838709,0.07289704398569422 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.3593813663804626,train,0.994579945799458,0.0039046365387581587,0.9923570836785418,0.00557487101210415,0.9883720930232558,0.008376807458149808 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.3593813663804626,test,0.6829268292682927,0.06877052340899013,0.6072218128224024,0.0799374023165431,0.6209677419354839,0.08809301648002199 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.3593813663804626,train,0.991869918699187,0.004579236968287089,0.9884880564885973,0.006593244968247748,0.9825581395348837,0.009824060705220612 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.3593813663804626,test,0.7073170731707317,0.054522424969082404,0.5340909090909092,0.0841857951687112,0.535483870967742,0.07027009427803489 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,test,0.7804878048780488,0.050062091131625214,0.6328358208955224,0.09625303874561758,0.6177419354838709,0.07781215648052588 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.046415888336127774,train,0.9132791327913279,0.012506801786299182,0.861952861952862,0.022799956815931718,0.8220478264442436,0.025630685049598766 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.046415888336127774,test,0.7560975609756098,0.061636535129902865,0.6440972222222222,0.09332169105156134,0.635483870967742,0.08957153182353979 +flat_mae,patch,logistic,adni_ad_vs_cn,55,0.046415888336127774,train,0.8970189701897019,0.013847086540371551,0.8343179884677191,0.025998240849637436,0.7952584435861616,0.02791136914273595 +flat_mae,patch,logistic,adni_ad_vs_cn,55,0.046415888336127774,test,0.8048780487804879,0.04319789692371338,0.6554621848739496,0.09667202898949442,0.6338709677419355,0.07456334734941862 +flat_mae,patch,logistic,adni_ad_vs_cn,56,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,56,166.81005372000556,test,0.7560975609756098,0.04705421076528226,0.569327731092437,0.08750044340908436,0.567741935483871,0.06678544793734237 +flat_mae,patch,logistic,adni_ad_vs_cn,57,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,57,166.81005372000556,test,0.7073170731707317,0.07374209442499186,0.6620879120879121,0.07836693545096986,0.7048387096774194,0.08709949004128931 +flat_mae,patch,logistic,adni_ad_vs_cn,58,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,58,21.54434690031882,test,0.7804878048780488,0.06360966257661838,0.7119437939110069,0.08269554143209482,0.7193548387096774,0.08781713920166742 +flat_mae,patch,logistic,adni_ad_vs_cn,59,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,59,21.54434690031882,test,0.7073170731707317,0.06900472011375304,0.603225806451613,0.08927980743512873,0.603225806451613,0.08896849487388707 +flat_mae,patch,logistic,adni_ad_vs_cn,60,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,60,21.54434690031882,test,0.7073170731707317,0.05317464742797856,0.5340909090909092,0.0831571629105031,0.535483870967742,0.07042784397577398 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.046415888336127774,train,0.8943089430894309,0.014419254310369377,0.8308632543926662,0.02625153128608567,0.7934916591338648,0.027494523316750938 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.046415888336127774,test,0.7804878048780488,0.049088272902857014,0.6328358208955224,0.09701235331161716,0.6177419354838709,0.07872240371733029 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.3593813663804626,test,0.7073170731707317,0.07214848526638923,0.6272727272727273,0.08561514113260653,0.6370967741935484,0.09108808123560504 +flat_mae,patch,logistic,adni_ad_vs_cn,63,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,63,166.81005372000556,test,0.7560975609756098,0.05954966833001967,0.6440972222222222,0.0891801616841854,0.635483870967742,0.08354404298084842 +flat_mae,patch,logistic,adni_ad_vs_cn,64,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,64,21.54434690031882,test,0.7073170731707317,0.05614570574537859,0.5340909090909092,0.08305002286299024,0.535483870967742,0.0706529572219306 +flat_mae,patch,logistic,adni_ad_vs_cn,65,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,65,166.81005372000556,test,0.6341463414634146,0.0697176083478329,0.5199063231850116,0.08209048794836472,0.5209677419354839,0.08479424224067113 +flat_mae,patch,logistic,adni_ad_vs_cn,66,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,66,166.81005372000556,test,0.6829268292682927,0.06603227808397157,0.5839188134270101,0.08505724416622785,0.5870967741935484,0.0880072087554425 +flat_mae,patch,logistic,adni_ad_vs_cn,67,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,67,166.81005372000556,test,0.6341463414634146,0.06835848746024496,0.5199063231850116,0.0818142741344335,0.5209677419354839,0.08434320181252021 +flat_mae,patch,logistic,adni_ad_vs_cn,68,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,68,166.81005372000556,test,0.6585365853658537,0.0710050060656089,0.5651515151515152,0.0828233800733484,0.5709677419354839,0.0877548336700958 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,test,0.7804878048780488,0.039659796577950344,0.5886287625418061,0.09373377399957371,0.5838709677419355,0.06754799727219062 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.3593813663804626,train,0.997289972899729,0.0024389701890996664,0.9961941891766453,0.003447090320247326,0.9941860465116279,0.005232441859173107 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.3593813663804626,test,0.7804878048780488,0.037534326941996675,0.5886287625418061,0.08634322435858609,0.5838709677419355,0.06174752148863623 +flat_mae,patch,logistic,adni_ad_vs_cn,71,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,71,2.782559402207126,test,0.6585365853658537,0.06598786685655482,0.5370967741935484,0.0797987355946394,0.5370967741935484,0.07986785678448191 +flat_mae,patch,logistic,adni_ad_vs_cn,72,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,72,166.81005372000556,test,0.7317073170731707,0.04938096269314808,0.5512437810945273,0.0865024669120548,0.5516129032258065,0.06923758092150997 +flat_mae,patch,logistic,adni_ad_vs_cn,73,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,73,21.54434690031882,test,0.7073170731707317,0.06355773722166055,0.5729166666666666,0.08979531083044984,0.5693548387096774,0.08401334435157498 +flat_mae,patch,logistic,adni_ad_vs_cn,74,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,74,21.54434690031882,test,0.7804878048780488,0.05703146276740277,0.6660633484162897,0.09117350843058672,0.6516129032258065,0.08408309056300459 +flat_mae,patch,logistic,adni_ad_vs_cn,75,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,75,0.3593813663804626,test,0.8048780487804879,0.050943180255213445,0.6893939393939394,0.0914611600415205,0.667741935483871,0.08044971387157776 +flat_mae,patch,logistic,adni_ad_vs_cn,76,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,76,166.81005372000556,test,0.6829268292682927,0.06288974138668024,0.5547201336675021,0.08344479511000333,0.5532258064516129,0.08006479237559892 +flat_mae,patch,logistic,adni_ad_vs_cn,77,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,77,2.782559402207126,test,0.6829268292682927,0.06468290843541659,0.5839188134270101,0.08113861444005495,0.5870967741935484,0.08497055981561974 +flat_mae,patch,logistic,adni_ad_vs_cn,78,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,78,2.782559402207126,test,0.6097560975609756,0.07967064863932231,0.5494505494505495,0.08448937168146847,0.5725806451612903,0.09640551379734821 +flat_mae,patch,logistic,adni_ad_vs_cn,79,0.3593813663804626,train,0.991869918699187,0.00448040183818428,0.9884880564885973,0.006450589421436196,0.9825581395348837,0.00961202487377912 +flat_mae,patch,logistic,adni_ad_vs_cn,79,0.3593813663804626,test,0.8536585365853658,0.052540435124026244,0.8136363636363637,0.06416458096030163,0.8354838709677419,0.06759610909379467 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,test,0.7317073170731707,0.06425727216105592,0.6232247284878863,0.09126148667070301,0.6193548387096774,0.08753776648918267 +flat_mae,patch,logistic,adni_ad_vs_cn,81,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,81,21.54434690031882,test,0.7317073170731707,0.056171827827861384,0.5918552036199095,0.085537811098284,0.5854838709677419,0.07580193974218905 +flat_mae,patch,logistic,adni_ad_vs_cn,82,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,82,2.782559402207126,test,0.7317073170731707,0.06228355262843503,0.6232247284878863,0.08737745092691357,0.6193548387096774,0.0853039774218989 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,test,0.6585365853658537,0.06684785214282848,0.5370967741935484,0.08324913335607285,0.5370967741935484,0.08357371212888083 +flat_mae,patch,logistic,adni_ad_vs_cn,84,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,84,166.81005372000556,test,0.7317073170731707,0.06245045627054907,0.6232247284878863,0.08751784621291649,0.6193548387096774,0.08658236357123594 +flat_mae,patch,logistic,adni_ad_vs_cn,85,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,85,166.81005372000556,test,0.8292682926829268,0.05182232952118019,0.7402714932126697,0.08557110964689793,0.717741935483871,0.08196531556246789 +flat_mae,patch,logistic,adni_ad_vs_cn,86,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,86,166.81005372000556,test,0.6341463414634146,0.07579883517992969,0.5684210526315789,0.08220750848301907,0.5887096774193548,0.09361762598966995 +flat_mae,patch,logistic,adni_ad_vs_cn,87,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,87,21.54434690031882,test,0.7317073170731707,0.06450145606590792,0.6479313036690086,0.08251565719106585,0.6532258064516129,0.0863588411112917 +flat_mae,patch,logistic,adni_ad_vs_cn,88,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,88,166.81005372000556,test,0.6585365853658537,0.06919480815286877,0.5370967741935484,0.08318589604135711,0.5370967741935484,0.08422599912259748 +flat_mae,patch,logistic,adni_ad_vs_cn,89,0.046415888336127774,train,0.8943089430894309,0.01321679411073135,0.82903881107666,0.024957699022891567,0.7894444900977895,0.026357555694422732 +flat_mae,patch,logistic,adni_ad_vs_cn,89,0.046415888336127774,test,0.7804878048780488,0.05132588257874008,0.6328358208955224,0.09692641307591987,0.6177419354838709,0.08008538783344239 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.3593813663804626,train,0.991869918699187,0.004545984635638291,0.9884880564885973,0.006544694461539688,0.9825581395348837,0.009752722852038011 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.3593813663804626,test,0.8292682926829268,0.054470552108417586,0.7402714932126697,0.08880699242711297,0.717741935483871,0.08498103669227908 +flat_mae,patch,logistic,adni_ad_vs_cn,91,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,91,166.81005372000556,test,0.7560975609756098,0.06273208663542718,0.6693548387096775,0.08656911693631089,0.6693548387096775,0.08935651908283723 +flat_mae,patch,logistic,adni_ad_vs_cn,92,0.046415888336127774,train,0.8997289972899729,0.012965171840296118,0.8395369336545807,0.023865418408029887,0.8010723970745337,0.025992400164343843 +flat_mae,patch,logistic,adni_ad_vs_cn,92,0.046415888336127774,test,0.7073170731707317,0.046769677532204436,0.4831932773109243,0.07460994949827565,0.5016129032258064,0.057441860304977645 +flat_mae,patch,logistic,adni_ad_vs_cn,93,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,93,21.54434690031882,test,0.6829268292682927,0.07027226730828623,0.5839188134270101,0.08378475287482881,0.5870967741935484,0.08655682205539217 +flat_mae,patch,logistic,adni_ad_vs_cn,94,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,94,166.81005372000556,test,0.7073170731707317,0.06046929299733757,0.5729166666666666,0.08540378759933065,0.5693548387096774,0.07933732431949606 +flat_mae,patch,logistic,adni_ad_vs_cn,95,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,95,166.81005372000556,test,0.7073170731707317,0.06829066199100388,0.603225806451613,0.08954515053763401,0.603225806451613,0.09074802818897666 +flat_mae,patch,logistic,adni_ad_vs_cn,96,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,96,21.54434690031882,test,0.6829268292682927,0.0643633351016978,0.5547201336675021,0.08578025354950224,0.5532258064516129,0.08154134431358231 +flat_mae,patch,logistic,adni_ad_vs_cn,97,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,97,166.81005372000556,test,0.6829268292682927,0.05110116408508405,0.4696517412935323,0.06685915160968459,0.4854838709677419,0.05516540410994869 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.046415888336127774,train,0.9105691056910569,0.012477877104234147,0.8583822759783684,0.02229628830165471,0.8202810419919468,0.02522876625021612 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.046415888336127774,test,0.7073170731707317,0.05792822077917882,0.5729166666666666,0.08562176077449665,0.5693548387096774,0.08009604347944656 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.3593813663804626,train,0.994579945799458,0.003980973543170335,0.9923570836785418,0.00569244660437376,0.9883720930232558,0.0085405769618015 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.3593813663804626,test,0.7560975609756098,0.060292840792278916,0.6440972222222222,0.08925101816367763,0.635483870967742,0.08413436308066136 +flat_mae,patch,logistic,adni_ad_vs_cn,100,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,100,2.782559402207126,test,0.7804878048780488,0.054971497671216285,0.6660633484162897,0.09086355370732972,0.6516129032258065,0.08404206487262263 diff --git a/data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt b/data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..fe95e09ce051451747124f71e547517428caceaa --- /dev/null +++ b/data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:21:07 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n100_2; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:10:36 time: 3.8806 data: 2.8381 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:53 time: 0.1978 data: 0.0715 max mem: 2851 +extract (train) [ 40/164] eta: 0:00:33 time: 0.1691 data: 0.0529 max mem: 2851 +extract (train) [ 60/164] eta: 0:00:24 time: 0.1505 data: 0.0434 max mem: 2851 +extract (train) [ 80/164] eta: 0:00:17 time: 0.1511 data: 0.0442 max mem: 2851 +extract (train) [100/164] eta: 0:00:13 time: 0.1779 data: 0.0615 max mem: 2851 +extract (train) [120/164] eta: 0:00:08 time: 0.1679 data: 0.0527 max mem: 2851 +extract (train) [140/164] eta: 0:00:04 time: 0.1578 data: 0.0450 max mem: 2851 +extract (train) [160/164] eta: 0:00:00 time: 0.1379 data: 0.0365 max mem: 2851 +extract (train) [163/164] eta: 0:00:00 time: 0.1397 data: 0.0370 max mem: 2851 +extract (train) Total time: 0:00:30 (0.1887 s / it) +extract (validation) [ 0/21] eta: 0:01:09 time: 3.3103 data: 3.2071 max mem: 2851 +extract (validation) [20/21] eta: 0:00:00 time: 0.1334 data: 0.0344 max mem: 2851 +extract (validation) Total time: 0:00:06 (0.2961 s / it) +extract (test) [ 0/21] eta: 0:01:06 time: 3.1682 data: 3.0586 max mem: 2851 +extract (test) [20/21] eta: 0:00:00 time: 0.1391 data: 0.0361 max mem: 2851 +extract (test) Total time: 0:00:06 (0.2961 s / it) +feature extraction time: 0:00:43 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | | 2.7826 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | adni_ad_vs_cn | | 2.7826 | test | 0.78049 | 0.060727 | 0.66606 | 0.091923 | 0.65972 | 0.090194 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06334575997148711, "f1": 0.6232247284878863, "f1_std": 0.08793672358275198, "bacc": 0.6193548387096774, "bacc_std": 0.08598658124385597} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05729319689518083, "f1": 0.6440972222222222, "f1_std": 0.08939344958909794, "bacc": 0.635483870967742, "bacc_std": 0.08318923577609934} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0642529208256111, "f1": 0.603225806451613, "f1_std": 0.08545209402104924, "bacc": 0.603225806451613, "bacc_std": 0.08584654768981771} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06269004420065305, "f1": 0.6917293233082706, "f1_std": 0.0874938582730628, "bacc": 0.685483870967742, "bacc_std": 0.08655773722232406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06275453819643223, "f1": 0.5547201336675021, "f1_std": 0.08491336852780806, "bacc": 0.5532258064516129, "bacc_std": 0.08101260002717708} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.057780050240804244, "f1": 0.5918552036199095, "f1_std": 0.0876345941865284, "bacc": 0.5854838709677419, "bacc_std": 0.0778780013259887} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.04773242393868619, "f1": 0.4831932773109243, "f1_std": 0.07144347037645617, "bacc": 0.5016129032258064, "bacc_std": 0.05543197272708444} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 1291.5496650148827, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06260260584176712, "f1": 0.5547201336675021, "f1_std": 0.0806557428489314, "bacc": 0.5532258064516129, "bacc_std": 0.07738504959037022} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06424477285185493, "f1": 0.6232247284878863, "f1_std": 0.0876379328977669, "bacc": 0.6193548387096774, "bacc_std": 0.08531574916245467} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05224595199914958, "f1": 0.6117424242424243, "f1_std": 0.08828437133556405, "bacc": 0.6016129032258064, "bacc_std": 0.07641454552603223} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06143119558173057, "f1": 0.6693548387096775, "f1_std": 0.08116957425182983, "bacc": 0.6693548387096775, "bacc_std": 0.0828933280774168} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.053071646319570696, "f1": 0.6117424242424243, "f1_std": 0.08833637303289316, "bacc": 0.6016129032258064, "bacc_std": 0.07667909391958687} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06553910422056822, "f1": 0.603225806451613, "f1_std": 0.08612420304963354, "bacc": 0.603225806451613, "bacc_std": 0.08705834875190863} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05011945278919316, "f1": 0.6893939393939394, "f1_std": 0.09085930439083088, "bacc": 0.667741935483871, "bacc_std": 0.08125269764192135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06144877874522968, "f1": 0.6693548387096775, "f1_std": 0.08285053772311125, "bacc": 0.6693548387096775, "bacc_std": 0.08349706357345876} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.038199077762942416, "f1": 0.5886287625418061, "f1_std": 0.08830918359080019, "bacc": 0.5838709677419355, "bacc_std": 0.06300486833509769} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.032299129855323745, "f1": 0.5119047619047619, "f1_std": 0.07636737572965696, "bacc": 0.5338709677419355, "bacc_std": 0.04939062417608875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05160161769019545, "f1": 0.6328358208955224, "f1_std": 0.09780049056868827, "bacc": 0.6177419354838709, "bacc_std": 0.07914255134832966} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06122856798243978, "f1": 0.6232247284878863, "f1_std": 0.08474170587545705, "bacc": 0.6193548387096774, "bacc_std": 0.08302419967412908} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.05453789430777899, "f1": 0.8136363636363637, "f1_std": 0.06643653684392899, "bacc": 0.8354838709677419, "bacc_std": 0.06950050262209798} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05776112371010982, "f1": 0.5918552036199095, "f1_std": 0.08747961129875054, "bacc": 0.5854838709677419, "bacc_std": 0.07808246094736164} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06435209512930587, "f1": 0.5547201336675021, "f1_std": 0.08474631590071453, "bacc": 0.5532258064516129, "bacc_std": 0.08136621361475586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05574019789089487, "f1": 0.5340909090909092, "f1_std": 0.08645497420092481, "bacc": 0.535483870967742, "bacc_std": 0.07311589555727646} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06757143530901505, "f1": 0.6479313036690086, "f1_std": 0.08623591685943618, "bacc": 0.6532258064516129, "bacc_std": 0.0898230833690946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.047625747014646946, "f1": 0.6328358208955224, "f1_std": 0.09328656885900573, "bacc": 0.6177419354838709, "bacc_std": 0.07696194851516416} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 21.54434690031882, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.06125485327200916, "f1": 0.7759562841530054, "f1_std": 0.07677483571327794, "bacc": 0.7854838709677419, "bacc_std": 0.07949651469136822} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.03239406383587121, "f1": 0.5119047619047619, "f1_std": 0.07574509359716605, "bacc": 0.5338709677419355, "bacc_std": 0.04871810170689122} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 21.54434690031882, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05182616346510896, "f1": 0.7402714932126697, "f1_std": 0.08835546731515824, "bacc": 0.717741935483871, "bacc_std": 0.08446826297830635} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06098466274259032, "f1": 0.6440972222222222, "f1_std": 0.09241747221030037, "bacc": 0.635483870967742, "bacc_std": 0.0870902707249367} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05681306164804977, "f1": 0.5340909090909092, "f1_std": 0.08578774121939232, "bacc": 0.535483870967742, "bacc_std": 0.07177831840267589} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06581706300248026, "f1": 0.603225806451613, "f1_std": 0.08650869472982525, "bacc": 0.603225806451613, "bacc_std": 0.08836093113998447} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04622321241382579, "f1": 0.569327731092437, "f1_std": 0.09002649008430437, "bacc": 0.567741935483871, "bacc_std": 0.06821251117310594} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06340194995096579, "f1": 0.603225806451613, "f1_std": 0.08470539239040872, "bacc": 0.603225806451613, "bacc_std": 0.0843427761683204} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06802125503273543, "f1": 0.5839188134270101, "f1_std": 0.08584975342551027, "bacc": 0.5870967741935484, "bacc_std": 0.08833987530667423} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04837576002750906, "f1": 0.6328358208955224, "f1_std": 0.09439119830808082, "bacc": 0.6177419354838709, "bacc_std": 0.07685922371811668} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06853925869276294, "f1": 0.6072218128224024, "f1_std": 0.0802813898542739, "bacc": 0.6209677419354839, "bacc_std": 0.08805234578553298} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05868347423867302, "f1": 0.5918552036199095, "f1_std": 0.09064578600359427, "bacc": 0.5854838709677419, "bacc_std": 0.08130508468112682} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.0711094873484387, "f1": 0.5651515151515152, "f1_std": 0.08183522417693422, "bacc": 0.5709677419354839, "bacc_std": 0.08507408780723476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.04128740259843674, "f1": 0.4057971014492754, "f1_std": 0.014814153536817585, "bacc": 0.45161290322580644, "bacc_std": 0.027302959782837203} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06870426497805573, "f1": 0.6272727272727273, "f1_std": 0.08191372207427694, "bacc": 0.6370967741935484, "bacc_std": 0.08724257921148011} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.049745838253640935, "f1": 0.6893939393939394, "f1_std": 0.09153024242130577, "bacc": 0.667741935483871, "bacc_std": 0.0805475715979233} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06810320404321528, "f1": 0.5839188134270101, "f1_std": 0.08258727178429913, "bacc": 0.5870967741935484, "bacc_std": 0.08620327083229049} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 1291.5496650148827, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.061078625175273925, "f1": 0.5729166666666666, "f1_std": 0.08822309372012205, "bacc": 0.5693548387096774, "bacc_std": 0.08178043394144081} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06379033174978144, "f1": 0.7280766396462786, "f1_std": 0.0750444962079556, "bacc": 0.7532258064516129, "bacc_std": 0.08020693131784559} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05880151430425519, "f1": 0.6440972222222222, "f1_std": 0.09062438890017298, "bacc": 0.635483870967742, "bacc_std": 0.08711843093952762} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.056022834640794175, "f1": 0.4564393939393939, "f1_std": 0.06534534917868648, "bacc": 0.4693548387096774, "bacc_std": 0.0577389784789794} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06143961986175282, "f1": 0.6232247284878863, "f1_std": 0.08314190073532594, "bacc": 0.6193548387096774, "bacc_std": 0.07997818380993593} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05915227025854575, "f1": 0.5918552036199095, "f1_std": 0.0907288989075725, "bacc": 0.5854838709677419, "bacc_std": 0.0824058507970006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05449145128310245, "f1": 0.6660633484162897, "f1_std": 0.0886720133697319, "bacc": 0.6516129032258065, "bacc_std": 0.08127285084897458} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.047319185268425525, "f1": 0.6328358208955224, "f1_std": 0.09088399060254398, "bacc": 0.6177419354838709, "bacc_std": 0.07289704398569422} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06877052340899013, "f1": 0.6072218128224024, "f1_std": 0.0799374023165431, "bacc": 0.6209677419354839, "bacc_std": 0.08809301648002199} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.054522424969082404, "f1": 0.5340909090909092, "f1_std": 0.0841857951687112, "bacc": 0.535483870967742, "bacc_std": 0.07027009427803489} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.050062091131625214, "f1": 0.6328358208955224, "f1_std": 0.09625303874561758, "bacc": 0.6177419354838709, "bacc_std": 0.07781215648052588} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.061636535129902865, "f1": 0.6440972222222222, "f1_std": 0.09332169105156134, "bacc": 0.635483870967742, "bacc_std": 0.08957153182353979} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04319789692371338, "f1": 0.6554621848739496, "f1_std": 0.09667202898949442, "bacc": 0.6338709677419355, "bacc_std": 0.07456334734941862} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04705421076528226, "f1": 0.569327731092437, "f1_std": 0.08750044340908436, "bacc": 0.567741935483871, "bacc_std": 0.06678544793734237} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07374209442499186, "f1": 0.6620879120879121, "f1_std": 0.07836693545096986, "bacc": 0.7048387096774194, "bacc_std": 0.08709949004128931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06360966257661838, "f1": 0.7119437939110069, "f1_std": 0.08269554143209482, "bacc": 0.7193548387096774, "bacc_std": 0.08781713920166742} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06900472011375304, "f1": 0.603225806451613, "f1_std": 0.08927980743512873, "bacc": 0.603225806451613, "bacc_std": 0.08896849487388707} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05317464742797856, "f1": 0.5340909090909092, "f1_std": 0.0831571629105031, "bacc": 0.535483870967742, "bacc_std": 0.07042784397577398} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.049088272902857014, "f1": 0.6328358208955224, "f1_std": 0.09701235331161716, "bacc": 0.6177419354838709, "bacc_std": 0.07872240371733029} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07214848526638923, "f1": 0.6272727272727273, "f1_std": 0.08561514113260653, "bacc": 0.6370967741935484, "bacc_std": 0.09108808123560504} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05954966833001967, "f1": 0.6440972222222222, "f1_std": 0.0891801616841854, "bacc": 0.635483870967742, "bacc_std": 0.08354404298084842} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05614570574537859, "f1": 0.5340909090909092, "f1_std": 0.08305002286299024, "bacc": 0.535483870967742, "bacc_std": 0.0706529572219306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.0697176083478329, "f1": 0.5199063231850116, "f1_std": 0.08209048794836472, "bacc": 0.5209677419354839, "bacc_std": 0.08479424224067113} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06603227808397157, "f1": 0.5839188134270101, "f1_std": 0.08505724416622785, "bacc": 0.5870967741935484, "bacc_std": 0.0880072087554425} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06835848746024496, "f1": 0.5199063231850116, "f1_std": 0.0818142741344335, "bacc": 0.5209677419354839, "bacc_std": 0.08434320181252021} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.0710050060656089, "f1": 0.5651515151515152, "f1_std": 0.0828233800733484, "bacc": 0.5709677419354839, "bacc_std": 0.0877548336700958} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.039659796577950344, "f1": 0.5886287625418061, "f1_std": 0.09373377399957371, "bacc": 0.5838709677419355, "bacc_std": 0.06754799727219062} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.037534326941996675, "f1": 0.5886287625418061, "f1_std": 0.08634322435858609, "bacc": 0.5838709677419355, "bacc_std": 0.06174752148863623} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06598786685655482, "f1": 0.5370967741935484, "f1_std": 0.0797987355946394, "bacc": 0.5370967741935484, "bacc_std": 0.07986785678448191} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04938096269314808, "f1": 0.5512437810945273, "f1_std": 0.0865024669120548, "bacc": 0.5516129032258065, "bacc_std": 0.06923758092150997} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06355773722166055, "f1": 0.5729166666666666, "f1_std": 0.08979531083044984, "bacc": 0.5693548387096774, "bacc_std": 0.08401334435157498} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05703146276740277, "f1": 0.6660633484162897, "f1_std": 0.09117350843058672, "bacc": 0.6516129032258065, "bacc_std": 0.08408309056300459} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.050943180255213445, "f1": 0.6893939393939394, "f1_std": 0.0914611600415205, "bacc": 0.667741935483871, "bacc_std": 0.08044971387157776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06288974138668024, "f1": 0.5547201336675021, "f1_std": 0.08344479511000333, "bacc": 0.5532258064516129, "bacc_std": 0.08006479237559892} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06468290843541659, "f1": 0.5839188134270101, "f1_std": 0.08113861444005495, "bacc": 0.5870967741935484, "bacc_std": 0.08497055981561974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 2.782559402207126, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07967064863932231, "f1": 0.5494505494505495, "f1_std": 0.08448937168146847, "bacc": 0.5725806451612903, "bacc_std": 0.09640551379734821} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 0.3593813663804626, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.052540435124026244, "f1": 0.8136363636363637, "f1_std": 0.06416458096030163, "bacc": 0.8354838709677419, "bacc_std": 0.06759610909379467} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06425727216105592, "f1": 0.6232247284878863, "f1_std": 0.09126148667070301, "bacc": 0.6193548387096774, "bacc_std": 0.08753776648918267} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.056171827827861384, "f1": 0.5918552036199095, "f1_std": 0.085537811098284, "bacc": 0.5854838709677419, "bacc_std": 0.07580193974218905} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06228355262843503, "f1": 0.6232247284878863, "f1_std": 0.08737745092691357, "bacc": 0.6193548387096774, "bacc_std": 0.0853039774218989} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06684785214282848, "f1": 0.5370967741935484, "f1_std": 0.08324913335607285, "bacc": 0.5370967741935484, "bacc_std": 0.08357371212888083} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06245045627054907, "f1": 0.6232247284878863, "f1_std": 0.08751784621291649, "bacc": 0.6193548387096774, "bacc_std": 0.08658236357123594} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 166.81005372000556, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05182232952118019, "f1": 0.7402714932126697, "f1_std": 0.08557110964689793, "bacc": 0.717741935483871, "bacc_std": 0.08196531556246789} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07579883517992969, "f1": 0.5684210526315789, "f1_std": 0.08220750848301907, "bacc": 0.5887096774193548, "bacc_std": 0.09361762598966995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06450145606590792, "f1": 0.6479313036690086, "f1_std": 0.08251565719106585, "bacc": 0.6532258064516129, "bacc_std": 0.0863588411112917} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06919480815286877, "f1": 0.5370967741935484, "f1_std": 0.08318589604135711, "bacc": 0.5370967741935484, "bacc_std": 0.08422599912259748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05132588257874008, "f1": 0.6328358208955224, "f1_std": 0.09692641307591987, "bacc": 0.6177419354838709, "bacc_std": 0.08008538783344239} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 0.3593813663804626, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.054470552108417586, "f1": 0.7402714932126697, "f1_std": 0.08880699242711297, "bacc": 0.717741935483871, "bacc_std": 0.08498103669227908} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06273208663542718, "f1": 0.6693548387096775, "f1_std": 0.08656911693631089, "bacc": 0.6693548387096775, "bacc_std": 0.08935651908283723} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.046769677532204436, "f1": 0.4831932773109243, "f1_std": 0.07460994949827565, "bacc": 0.5016129032258064, "bacc_std": 0.057441860304977645} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 21.54434690031882, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07027226730828623, "f1": 0.5839188134270101, "f1_std": 0.08378475287482881, "bacc": 0.5870967741935484, "bacc_std": 0.08655682205539217} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06046929299733757, "f1": 0.5729166666666666, "f1_std": 0.08540378759933065, "bacc": 0.5693548387096774, "bacc_std": 0.07933732431949606} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06829066199100388, "f1": 0.603225806451613, "f1_std": 0.08954515053763401, "bacc": 0.603225806451613, "bacc_std": 0.09074802818897666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 21.54434690031882, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.0643633351016978, "f1": 0.5547201336675021, "f1_std": 0.08578025354950224, "bacc": 0.5532258064516129, "bacc_std": 0.08154134431358231} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05110116408508405, "f1": 0.4696517412935323, "f1_std": 0.06685915160968459, "bacc": 0.4854838709677419, "bacc_std": 0.05516540410994869} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05792822077917882, "f1": 0.5729166666666666, "f1_std": 0.08562176077449665, "bacc": 0.5693548387096774, "bacc_std": 0.08009604347944656} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.060292840792278916, "f1": 0.6440972222222222, "f1_std": 0.08925101816367763, "bacc": 0.635483870967742, "bacc_std": 0.08413436308066136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.054971497671216285, "f1": 0.6660633484162897, "f1_std": 0.09086355370732972, "bacc": 0.6516129032258065, "bacc_std": 0.08404206487262263} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | train | 100 | 80.009 | 189.07 | 0.98244 | 0.038253 | 0.97149 | 0.063429 | 0.96443 | 0.077347 | +| flat_mae | patch | logistic | adni_ad_vs_cn | test | 100 | 80.009 | 189.07 | 0.7322 | 0.050947 | 0.60704 | 0.070466 | 0.60647 | 0.067244 | + + +done! total time: 0:04:32 diff --git a/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/config.yaml b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b95b084774abd9095a976a90a74f8d803065d46d --- /dev/null +++ b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n100_2; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn +remote_dir: null diff --git a/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_log.json b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..5d5fd84026375603d93c5e8de18c0a7dc05accb3 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 17, "eval/id_best": 43, "eval/lr_best": 0.006599999999999999, "eval/wd_best": 0.05, "eval/train/loss": 0.00010308645869372413, "eval/train/acc": 1.0, "eval/train/acc_std": 0.0, "eval/train/f1": 1.0, "eval/train/f1_std": 0.0, "eval/validation/loss": 0.2797021269798279, "eval/validation/acc": 0.9635416666666666, "eval/validation/acc_std": 0.0029880390387508927, "eval/validation/f1": 0.9565002903672909, "eval/validation/f1_std": 0.0038571716148288354, "eval/test/loss": 0.30021265149116516, "eval/test/acc": 0.9632936507936508, "eval/test/acc_std": 0.002535844570267519, "eval/test/f1": 0.9545481066774228, "eval/test/f1_std": 0.003449931567784212} diff --git a/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..a023212bd43b04951d59a7012e682cc0d7e04e09 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 17, "eval/best/id_best": 43, "eval/best/lr_best": 0.006599999999999999, "eval/best/wd_best": 0.05, "eval/best/train/loss": 0.00010308645869372413, "eval/best/train/acc": 1.0, "eval/best/train/acc_std": 0.0, "eval/best/train/f1": 1.0, "eval/best/train/f1_std": 0.0, "eval/best/validation/loss": 0.2797021269798279, "eval/best/validation/acc": 0.9635416666666666, "eval/best/validation/acc_std": 0.0029880390387508927, "eval/best/validation/f1": 0.9565002903672909, "eval/best/validation/f1_std": 0.0038571716148288354, "eval/best/test/loss": 0.30021265149116516, "eval/best/test/acc": 0.9632936507936508, "eval/best/test/acc_std": 0.002535844570267519, "eval/best/test/f1": 0.9545481066774228, "eval/best/test/f1_std": 0.003449931567784212} diff --git a/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..22090bb81cc1417296e502a8b74c947b2c21d5e8 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 43, "eval/last/lr_best": 0.006599999999999999, "eval/last/wd_best": 0.05, "eval/last/train/loss": 0.0001044704404193908, "eval/last/train/acc": 1.0, "eval/last/train/acc_std": 0.0, "eval/last/train/f1": 1.0, "eval/last/train/f1_std": 0.0, "eval/last/validation/loss": 0.2780441343784332, "eval/last/validation/acc": 0.9635416666666666, "eval/last/validation/acc_std": 0.0029650230298895195, "eval/last/validation/f1": 0.9561333052343192, "eval/last/validation/f1_std": 0.003821224564275038, "eval/last/test/loss": 0.29874613881111145, "eval/last/test/acc": 0.9636904761904762, "eval/last/test/acc_std": 0.0025437139072971285, "eval/last/test/f1": 0.9550425705771689, "eval/last/test/f1_std": 0.0034556023368705023} diff --git a/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..bb8311f7929d3b90950d154ad734d4f564fb2403 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,17,0.006599999999999999,0.05,43,"[22, 1.0]",train,0.00010308645869372413,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,17,0.006599999999999999,0.05,43,"[22, 1.0]",validation,0.2797021269798279,0.9635416666666666,0.0029880390387508927,0.9565002903672909,0.0038571716148288354 +flat_mae,patch,attn,hcpya_task21,best,17,0.006599999999999999,0.05,43,"[22, 1.0]",test,0.30021265149116516,0.9632936507936508,0.002535844570267519,0.9545481066774228,0.003449931567784212 diff --git a/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..bb8311f7929d3b90950d154ad734d4f564fb2403 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,17,0.006599999999999999,0.05,43,"[22, 1.0]",train,0.00010308645869372413,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,17,0.006599999999999999,0.05,43,"[22, 1.0]",validation,0.2797021269798279,0.9635416666666666,0.0029880390387508927,0.9565002903672909,0.0038571716148288354 +flat_mae,patch,attn,hcpya_task21,best,17,0.006599999999999999,0.05,43,"[22, 1.0]",test,0.30021265149116516,0.9632936507936508,0.002535844570267519,0.9545481066774228,0.003449931567784212 diff --git a/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..555b36637d7702e0c3b81351bb51bfdb67d93cd1 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,last,19,0.006599999999999999,0.05,43,"[22, 1.0]",train,0.0001044704404193908,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,last,19,0.006599999999999999,0.05,43,"[22, 1.0]",validation,0.2780441343784332,0.9635416666666666,0.0029650230298895195,0.9561333052343192,0.003821224564275038 +flat_mae,patch,attn,hcpya_task21,last,19,0.006599999999999999,0.05,43,"[22, 1.0]",test,0.29874613881111145,0.9636904761904762,0.0025437139072971285,0.9550425705771689,0.0034556023368705023 diff --git a/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/log.txt b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..46acd371b2dc74ad976d91b33c3cf0a37e196f5b --- /dev/null +++ b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/log.txt @@ -0,0 +1,896 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 19:52:22 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n100_2; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 58.7M (58.7M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:23:15 lr: nan time: 3.4895 data: 2.9038 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:52 lr: 0.000003 loss: 3.0515 (3.0471) grad: 0.2677 (0.2754) time: 0.4678 data: 0.0035 max mem: 22446 +train: [0] [ 40/400] eta: 0:03:12 lr: 0.000006 loss: 3.0028 (3.0091) grad: 0.2713 (0.2735) time: 0.4547 data: 0.0035 max mem: 22446 +train: [0] [ 60/400] eta: 0:02:52 lr: 0.000009 loss: 2.9208 (2.9675) grad: 0.2648 (0.2680) time: 0.4504 data: 0.0034 max mem: 22446 +train: [0] [ 80/400] eta: 0:02:38 lr: 0.000012 loss: 2.8568 (2.9296) grad: 0.2406 (0.2610) time: 0.4627 data: 0.0033 max mem: 22446 +train: [0] [100/400] eta: 0:02:26 lr: 0.000015 loss: 2.7573 (2.8831) grad: 0.2327 (0.2575) time: 0.4544 data: 0.0034 max mem: 22446 +train: [0] [120/400] eta: 0:02:15 lr: 0.000018 loss: 2.6612 (2.8365) grad: 0.2367 (0.2533) time: 0.4659 data: 0.0034 max mem: 22446 +train: [0] [140/400] eta: 0:02:04 lr: 0.000021 loss: 2.5433 (2.7904) grad: 0.2340 (0.2524) time: 0.4582 data: 0.0034 max mem: 22446 +train: [0] [160/400] eta: 0:01:54 lr: 0.000024 loss: 2.4872 (2.7517) grad: 0.2254 (0.2483) time: 0.4510 data: 0.0035 max mem: 22446 +train: [0] [180/400] eta: 0:01:44 lr: 0.000027 loss: 2.4056 (2.7097) grad: 0.2113 (0.2447) time: 0.4539 data: 0.0034 max mem: 22446 +train: [0] [200/400] eta: 0:01:34 lr: 0.000030 loss: 2.3815 (2.6700) grad: 0.2229 (0.2428) time: 0.4629 data: 0.0036 max mem: 22446 +train: [0] [220/400] eta: 0:01:24 lr: 0.000033 loss: 2.2589 (2.6306) grad: 0.2186 (0.2405) time: 0.4600 data: 0.0035 max mem: 22446 +train: [0] [240/400] eta: 0:01:15 lr: 0.000036 loss: 2.1653 (2.5891) grad: 0.2186 (0.2397) time: 0.4504 data: 0.0035 max mem: 22446 +train: [0] [260/400] eta: 0:01:05 lr: 0.000039 loss: 2.1107 (2.5525) grad: 0.2275 (0.2385) time: 0.4743 data: 0.0035 max mem: 22446 +train: [0] [280/400] eta: 0:00:56 lr: 0.000042 loss: 2.1107 (2.5205) grad: 0.2085 (0.2364) time: 0.4505 data: 0.0034 max mem: 22446 +train: [0] [300/400] eta: 0:00:48 lr: 0.000045 loss: 2.0499 (2.4884) grad: 0.2031 (0.2341) time: 0.6454 data: 0.1803 max mem: 22446 +train: [0] [320/400] eta: 0:00:38 lr: 0.000048 loss: 2.0026 (2.4566) grad: 0.2031 (0.2324) time: 0.4684 data: 0.0029 max mem: 22446 +train: [0] [340/400] eta: 0:00:28 lr: 0.000051 loss: 1.9344 (2.4250) grad: 0.2096 (0.2314) time: 0.4529 data: 0.0036 max mem: 22446 +train: [0] [360/400] eta: 0:00:19 lr: 0.000054 loss: 1.9118 (2.3975) grad: 0.2104 (0.2304) time: 0.4489 data: 0.0034 max mem: 22446 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 1.9077 (2.3705) grad: 0.2081 (0.2290) time: 0.4616 data: 0.0034 max mem: 22446 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.8568 (2.3429) grad: 0.2036 (0.2281) time: 0.4484 data: 0.0036 max mem: 22446 +train: [0] Total time: 0:03:09 (0.4750 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.8568 (2.3429) grad: 0.2036 (0.2281) +eval (validation): [0] [ 0/63] eta: 0:03:26 time: 3.2793 data: 2.9889 max mem: 22446 +eval (validation): [0] [20/63] eta: 0:00:21 time: 0.3627 data: 0.0039 max mem: 22446 +eval (validation): [0] [40/63] eta: 0:00:09 time: 0.3467 data: 0.0032 max mem: 22446 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3223 data: 0.0033 max mem: 22446 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3205 data: 0.0032 max mem: 22446 +eval (validation): [0] Total time: 0:00:24 (0.3943 s / it) +cv: [0] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.432 acc: 0.865 f1: 0.839 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:22:33 lr: nan time: 3.3848 data: 3.0435 max mem: 22446 +train: [1] [ 20/400] eta: 0:03:38 lr: 0.000063 loss: 1.7689 (1.7831) grad: 0.1944 (0.2038) time: 0.4355 data: 0.0030 max mem: 22446 +train: [1] [ 40/400] eta: 0:03:03 lr: 0.000066 loss: 1.7740 (1.7676) grad: 0.1997 (0.2042) time: 0.4424 data: 0.0034 max mem: 22446 +train: [1] [ 60/400] eta: 0:02:47 lr: 0.000069 loss: 1.7173 (1.7483) grad: 0.2005 (0.2023) time: 0.4548 data: 0.0033 max mem: 22446 +train: [1] [ 80/400] eta: 0:02:36 lr: 0.000072 loss: 1.7131 (1.7400) grad: 0.2041 (0.2049) time: 0.4752 data: 0.0036 max mem: 22446 +train: [1] [100/400] eta: 0:02:24 lr: 0.000075 loss: 1.6917 (1.7305) grad: 0.2083 (0.2048) time: 0.4584 data: 0.0034 max mem: 22446 +train: [1] [120/400] eta: 0:02:13 lr: 0.000078 loss: 1.6634 (1.7130) grad: 0.2036 (0.2047) time: 0.4545 data: 0.0033 max mem: 22446 +train: [1] [140/400] eta: 0:02:03 lr: 0.000081 loss: 1.6266 (1.7009) grad: 0.2036 (0.2036) time: 0.4692 data: 0.0034 max mem: 22446 +train: [1] [160/400] eta: 0:01:53 lr: 0.000084 loss: 1.5843 (1.6824) grad: 0.1884 (0.2021) time: 0.4591 data: 0.0036 max mem: 22446 +train: [1] [180/400] eta: 0:01:43 lr: 0.000087 loss: 1.5581 (1.6698) grad: 0.1945 (0.2017) time: 0.4511 data: 0.0035 max mem: 22446 +train: [1] [200/400] eta: 0:01:34 lr: 0.000090 loss: 1.4995 (1.6542) grad: 0.1950 (0.2006) time: 0.4607 data: 0.0036 max mem: 22446 +train: [1] [220/400] eta: 0:01:24 lr: 0.000093 loss: 1.4913 (1.6386) grad: 0.1947 (0.2006) time: 0.4510 data: 0.0034 max mem: 22446 +train: [1] [240/400] eta: 0:01:14 lr: 0.000096 loss: 1.4664 (1.6254) grad: 0.1963 (0.2002) time: 0.4531 data: 0.0034 max mem: 22446 +train: [1] [260/400] eta: 0:01:05 lr: 0.000099 loss: 1.4664 (1.6149) grad: 0.1886 (0.1993) time: 0.4504 data: 0.0035 max mem: 22446 +train: [1] [280/400] eta: 0:00:55 lr: 0.000102 loss: 1.4426 (1.6014) grad: 0.1920 (0.1997) time: 0.4534 data: 0.0035 max mem: 22446 +train: [1] [300/400] eta: 0:00:47 lr: 0.000105 loss: 1.4057 (1.5884) grad: 0.1880 (0.1983) time: 0.6148 data: 0.1795 max mem: 22446 +train: [1] [320/400] eta: 0:00:37 lr: 0.000108 loss: 1.4082 (1.5774) grad: 0.1783 (0.1973) time: 0.4613 data: 0.0034 max mem: 22446 +train: [1] [340/400] eta: 0:00:28 lr: 0.000111 loss: 1.4048 (1.5645) grad: 0.1738 (0.1957) time: 0.4508 data: 0.0035 max mem: 22446 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 1.3491 (1.5554) grad: 0.1718 (0.1947) time: 0.4570 data: 0.0035 max mem: 22446 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 1.3564 (1.5452) grad: 0.1854 (0.1944) time: 0.4668 data: 0.0035 max mem: 22446 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 1.3497 (1.5344) grad: 0.1854 (0.1938) time: 0.4495 data: 0.0036 max mem: 22446 +train: [1] Total time: 0:03:08 (0.4711 s / it) +train: [1] Summary: lr: 0.000120 loss: 1.3497 (1.5344) grad: 0.1854 (0.1938) +eval (validation): [1] [ 0/63] eta: 0:03:23 time: 3.2252 data: 2.9938 max mem: 22446 +eval (validation): [1] [20/63] eta: 0:00:20 time: 0.3411 data: 0.0036 max mem: 22446 +eval (validation): [1] [40/63] eta: 0:00:09 time: 0.3514 data: 0.0030 max mem: 22446 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3184 data: 0.0032 max mem: 22446 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3213 data: 0.0032 max mem: 22446 +eval (validation): [1] Total time: 0:00:24 (0.3885 s / it) +cv: [1] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 0.373 acc: 0.899 f1: 0.877 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:22:56 lr: nan time: 3.4423 data: 3.0504 max mem: 22446 +train: [2] [ 20/400] eta: 0:03:48 lr: 0.000123 loss: 1.3165 (1.2960) grad: 0.1940 (0.1952) time: 0.4589 data: 0.0030 max mem: 22446 +train: [2] [ 40/400] eta: 0:03:09 lr: 0.000126 loss: 1.3016 (1.2889) grad: 0.1912 (0.1935) time: 0.4459 data: 0.0036 max mem: 22446 +train: [2] [ 60/400] eta: 0:02:52 lr: 0.000129 loss: 1.2466 (1.2790) grad: 0.1902 (0.1941) time: 0.4719 data: 0.0035 max mem: 22446 +train: [2] [ 80/400] eta: 0:02:39 lr: 0.000132 loss: 1.2597 (1.2842) grad: 0.1944 (0.1954) time: 0.4744 data: 0.0035 max mem: 22446 +train: [2] [100/400] eta: 0:02:27 lr: 0.000135 loss: 1.2749 (1.2791) grad: 0.1979 (0.1969) time: 0.4627 data: 0.0033 max mem: 22446 +train: [2] [120/400] eta: 0:02:16 lr: 0.000138 loss: 1.2601 (1.2767) grad: 0.1962 (0.1972) time: 0.4613 data: 0.0034 max mem: 22446 +train: [2] [140/400] eta: 0:02:05 lr: 0.000141 loss: 1.2057 (1.2652) grad: 0.1918 (0.1971) time: 0.4468 data: 0.0034 max mem: 22446 +train: [2] [160/400] eta: 0:01:55 lr: 0.000144 loss: 1.2169 (1.2650) grad: 0.1940 (0.1984) time: 0.4684 data: 0.0035 max mem: 22446 +train: [2] [180/400] eta: 0:01:44 lr: 0.000147 loss: 1.2258 (1.2599) grad: 0.2016 (0.1984) time: 0.4426 data: 0.0033 max mem: 22446 +train: [2] [200/400] eta: 0:01:35 lr: 0.000150 loss: 1.2102 (1.2502) grad: 0.1972 (0.1988) time: 0.4725 data: 0.0038 max mem: 22446 +train: [2] [220/400] eta: 0:01:25 lr: 0.000153 loss: 1.2247 (1.2512) grad: 0.1974 (0.1993) time: 0.4486 data: 0.0036 max mem: 22446 +train: [2] [240/400] eta: 0:01:15 lr: 0.000156 loss: 1.2041 (1.2426) grad: 0.2104 (0.2006) time: 0.4532 data: 0.0035 max mem: 22446 +train: [2] [260/400] eta: 0:01:05 lr: 0.000159 loss: 1.2041 (1.2411) grad: 0.2262 (0.2030) time: 0.4489 data: 0.0035 max mem: 22446 +train: [2] [280/400] eta: 0:00:56 lr: 0.000162 loss: 1.2256 (1.2398) grad: 0.2306 (0.2050) time: 0.4477 data: 0.0035 max mem: 22446 +train: [2] [300/400] eta: 0:00:47 lr: 0.000165 loss: 1.1597 (1.2349) grad: 0.2312 (0.2074) time: 0.6209 data: 0.1804 max mem: 22446 +train: [2] [320/400] eta: 0:00:38 lr: 0.000168 loss: 1.1244 (1.2308) grad: 0.2435 (0.2105) time: 0.4617 data: 0.0027 max mem: 22446 +train: [2] [340/400] eta: 0:00:28 lr: 0.000171 loss: 1.1276 (1.2246) grad: 0.2348 (0.2109) time: 0.4501 data: 0.0034 max mem: 22446 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 1.1123 (1.2188) grad: 0.2197 (0.2116) time: 0.4615 data: 0.0035 max mem: 22446 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 1.1123 (1.2140) grad: 0.2298 (0.2157) time: 0.4611 data: 0.0035 max mem: 22446 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 1.0732 (1.2064) grad: 0.2728 (0.2186) time: 0.4552 data: 0.0035 max mem: 22446 +train: [2] Total time: 0:03:09 (0.4735 s / it) +train: [2] Summary: lr: 0.000180 loss: 1.0732 (1.2064) grad: 0.2728 (0.2186) +eval (validation): [2] [ 0/63] eta: 0:03:36 time: 3.4410 data: 3.1613 max mem: 22446 +eval (validation): [2] [20/63] eta: 0:00:21 time: 0.3600 data: 0.0029 max mem: 22446 +eval (validation): [2] [40/63] eta: 0:00:09 time: 0.3333 data: 0.0035 max mem: 22446 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3457 data: 0.0034 max mem: 22446 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3464 data: 0.0034 max mem: 22446 +eval (validation): [2] Total time: 0:00:25 (0.4003 s / it) +cv: [2] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 0.308 acc: 0.922 f1: 0.895 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:22:41 lr: nan time: 3.4037 data: 3.0673 max mem: 22446 +train: [3] [ 20/400] eta: 0:03:48 lr: 0.000183 loss: 1.0854 (1.0700) grad: 0.2782 (0.2676) time: 0.4604 data: 0.0032 max mem: 22446 +train: [3] [ 40/400] eta: 0:03:08 lr: 0.000186 loss: 1.1078 (1.1061) grad: 0.2849 (0.2864) time: 0.4437 data: 0.0035 max mem: 22446 +train: [3] [ 60/400] eta: 0:02:53 lr: 0.000189 loss: 1.1430 (1.1222) grad: 0.3115 (0.3045) time: 0.4816 data: 0.0036 max mem: 22446 +train: [3] [ 80/400] eta: 0:02:39 lr: 0.000192 loss: 1.1189 (1.1260) grad: 0.2962 (0.3010) time: 0.4658 data: 0.0036 max mem: 22446 +train: [3] [100/400] eta: 0:02:28 lr: 0.000195 loss: 1.0850 (1.1173) grad: 0.2816 (0.2965) time: 0.4806 data: 0.0035 max mem: 22446 +train: [3] [120/400] eta: 0:02:17 lr: 0.000198 loss: 1.0701 (1.1225) grad: 0.2808 (0.3026) time: 0.4629 data: 0.0034 max mem: 22446 +train: [3] [140/400] eta: 0:02:06 lr: 0.000201 loss: 1.1380 (1.1237) grad: 0.3201 (0.3040) time: 0.4548 data: 0.0034 max mem: 22446 +train: [3] [160/400] eta: 0:01:55 lr: 0.000204 loss: 1.1370 (1.1248) grad: 0.3255 (0.3094) time: 0.4630 data: 0.0034 max mem: 22446 +train: [3] [180/400] eta: 0:01:45 lr: 0.000207 loss: 1.1191 (1.1226) grad: 0.3237 (0.3114) time: 0.4661 data: 0.0034 max mem: 22446 +train: [3] [200/400] eta: 0:01:35 lr: 0.000210 loss: 1.1080 (1.1202) grad: 0.3454 (0.3160) time: 0.4716 data: 0.0035 max mem: 22446 +train: [3] [220/400] eta: 0:01:25 lr: 0.000213 loss: 1.1052 (1.1206) grad: 0.3630 (0.3271) time: 0.4583 data: 0.0034 max mem: 22446 +train: [3] [240/400] eta: 0:01:16 lr: 0.000216 loss: 1.1254 (1.1281) grad: 0.3769 (0.3315) time: 0.4542 data: 0.0034 max mem: 22446 +train: [3] [260/400] eta: 0:01:06 lr: 0.000219 loss: 1.1435 (1.1269) grad: 0.3841 (0.3417) time: 0.4562 data: 0.0034 max mem: 22446 +train: [3] [280/400] eta: 0:00:56 lr: 0.000222 loss: 1.1686 (1.1335) grad: 0.4210 (0.3499) time: 0.4531 data: 0.0037 max mem: 22446 +train: [3] [300/400] eta: 0:00:48 lr: 0.000225 loss: 1.1841 (1.1358) grad: 0.4285 (0.3590) time: 0.6547 data: 0.1872 max mem: 22446 +train: [3] [320/400] eta: 0:00:38 lr: 0.000228 loss: 1.1599 (1.1384) grad: 0.4305 (0.3712) time: 0.4638 data: 0.0027 max mem: 22446 +train: [3] [340/400] eta: 0:00:28 lr: 0.000231 loss: 1.0359 (1.1335) grad: 0.4264 (0.3766) time: 0.4520 data: 0.0036 max mem: 22446 +train: [3] [360/400] eta: 0:00:19 lr: 0.000234 loss: 1.0359 (1.1315) grad: 0.4601 (0.3844) time: 0.4712 data: 0.0034 max mem: 22446 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 1.1013 (1.1345) grad: 0.5282 (0.3947) time: 0.4542 data: 0.0035 max mem: 22446 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 1.1013 (1.1338) grad: 0.5282 (0.4004) time: 0.4506 data: 0.0035 max mem: 22446 +train: [3] Total time: 0:03:11 (0.4786 s / it) +train: [3] Summary: lr: 0.000240 loss: 1.1013 (1.1338) grad: 0.5282 (0.4004) +eval (validation): [3] [ 0/63] eta: 0:03:23 time: 3.2295 data: 2.9485 max mem: 22446 +eval (validation): [3] [20/63] eta: 0:00:21 time: 0.3624 data: 0.0051 max mem: 22446 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3445 data: 0.0028 max mem: 22446 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3432 data: 0.0033 max mem: 22446 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3397 data: 0.0033 max mem: 22446 +eval (validation): [3] Total time: 0:00:25 (0.3997 s / it) +cv: [3] best hparam: (9.8, 1.0) (038) ('038_lr9.8e+00_wd1.0e+00') loss: 0.238 acc: 0.927 f1: 0.914 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:22:17 lr: nan time: 3.3430 data: 3.0040 max mem: 22446 +train: [4] [ 20/400] eta: 0:03:43 lr: 0.000243 loss: 1.2110 (1.2238) grad: 0.5054 (0.5320) time: 0.4513 data: 0.0037 max mem: 22446 +train: [4] [ 40/400] eta: 0:03:08 lr: 0.000246 loss: 1.2635 (1.2825) grad: 0.5195 (0.5606) time: 0.4551 data: 0.0034 max mem: 22446 +train: [4] [ 60/400] eta: 0:02:51 lr: 0.000249 loss: 1.2465 (1.2628) grad: 0.6204 (0.6037) time: 0.4604 data: 0.0036 max mem: 22446 +train: [4] [ 80/400] eta: 0:02:37 lr: 0.000252 loss: 1.2465 (1.2712) grad: 0.7194 (0.6369) time: 0.4601 data: 0.0037 max mem: 22446 +train: [4] [100/400] eta: 0:02:27 lr: 0.000255 loss: 1.2683 (1.2761) grad: 0.6728 (0.6446) time: 0.4829 data: 0.0039 max mem: 22446 +train: [4] [120/400] eta: 0:02:15 lr: 0.000258 loss: 1.3739 (1.3268) grad: 0.7808 (0.6914) time: 0.4556 data: 0.0037 max mem: 22446 +train: [4] [140/400] eta: 0:02:04 lr: 0.000261 loss: 1.5892 (1.3399) grad: 0.8359 (0.7271) time: 0.4468 data: 0.0036 max mem: 22446 +train: [4] [160/400] eta: 0:01:54 lr: 0.000264 loss: 1.4695 (1.3760) grad: 0.8034 (0.7366) time: 0.4665 data: 0.0035 max mem: 22446 +train: [4] [180/400] eta: 0:01:44 lr: 0.000267 loss: 1.4695 (1.3906) grad: 0.9041 (0.8447) time: 0.4634 data: 0.0037 max mem: 22446 +train: [4] [200/400] eta: 0:01:34 lr: 0.000270 loss: 1.3969 (1.3993) grad: 1.0012 (0.8610) time: 0.4573 data: 0.0036 max mem: 22446 +train: [4] [220/400] eta: 0:01:25 lr: 0.000273 loss: 1.4889 (1.4304) grad: 1.0010 (0.8745) time: 0.4649 data: 0.0035 max mem: 22446 +train: [4] [240/400] eta: 0:01:15 lr: 0.000276 loss: 1.7027 (1.4549) grad: 0.9527 (0.8778) time: 0.4654 data: 0.0035 max mem: 22446 +train: [4] [260/400] eta: 0:01:06 lr: 0.000279 loss: 1.7359 (1.4831) grad: 0.9103 (0.8847) time: 0.4622 data: 0.0035 max mem: 22446 +WARNING: classifier 48 (50, 1.0) diverged (loss=63.83 > 60.89) at step 937. Freezing. +train: [4] [280/400] eta: 0:00:56 lr: 0.000282 loss: 1.5376 (1.4922) grad: 1.0027 (0.8966) time: 0.4630 data: 0.0034 max mem: 22446 +train: [4] [300/400] eta: 0:00:48 lr: 0.000285 loss: 1.2707 (1.4746) grad: 0.7992 (0.8858) time: 0.6401 data: 0.1853 max mem: 22446 +train: [4] [320/400] eta: 0:00:38 lr: 0.000288 loss: 1.1047 (1.4509) grad: 0.6937 (0.8728) time: 0.4608 data: 0.0035 max mem: 22446 +train: [4] [340/400] eta: 0:00:28 lr: 0.000291 loss: 1.0521 (1.4288) grad: 0.6110 (0.8573) time: 0.4536 data: 0.0031 max mem: 22446 +train: [4] [360/400] eta: 0:00:19 lr: 0.000294 loss: 1.0968 (1.4192) grad: 0.6110 (0.8507) time: 0.4747 data: 0.0036 max mem: 22446 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 1.2922 (1.4125) grad: 0.7048 (0.8447) time: 0.4545 data: 0.0035 max mem: 22446 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 1.2996 (1.4080) grad: 0.7782 (0.8635) time: 0.4557 data: 0.0036 max mem: 22446 +train: [4] Total time: 0:03:10 (0.4772 s / it) +train: [4] Summary: lr: 0.000300 loss: 1.2996 (1.4080) grad: 0.7782 (0.8635) +eval (validation): [4] [ 0/63] eta: 0:03:29 time: 3.3229 data: 3.0534 max mem: 22446 +eval (validation): [4] [20/63] eta: 0:00:22 time: 0.3747 data: 0.0025 max mem: 22446 +eval (validation): [4] [40/63] eta: 0:00:09 time: 0.3481 data: 0.0037 max mem: 22446 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3424 data: 0.0035 max mem: 22446 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3420 data: 0.0034 max mem: 22446 +eval (validation): [4] Total time: 0:00:25 (0.4067 s / it) +cv: [4] best hparam: (8.3, 1.0) (037) ('037_lr8.3e+00_wd1.0e+00') loss: 0.298 acc: 0.925 f1: 0.909 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:22:51 lr: nan time: 3.4283 data: 3.0468 max mem: 22446 +train: [5] [ 20/400] eta: 0:03:40 lr: 0.000300 loss: 0.9793 (1.0898) grad: 0.6019 (0.6255) time: 0.4373 data: 0.0029 max mem: 22446 +train: [5] [ 40/400] eta: 0:03:07 lr: 0.000300 loss: 1.1380 (1.1444) grad: 0.6493 (0.6709) time: 0.4575 data: 0.0032 max mem: 22446 +train: [5] [ 60/400] eta: 0:02:49 lr: 0.000300 loss: 1.1920 (1.1876) grad: 0.6638 (0.6632) time: 0.4542 data: 0.0035 max mem: 22446 +train: [5] [ 80/400] eta: 0:02:37 lr: 0.000300 loss: 1.2797 (1.1973) grad: 0.6685 (0.6677) time: 0.4712 data: 0.0038 max mem: 22446 +train: [5] [100/400] eta: 0:02:25 lr: 0.000300 loss: 1.3081 (1.2414) grad: 0.6866 (0.6984) time: 0.4629 data: 0.0037 max mem: 22446 +train: [5] [120/400] eta: 0:02:14 lr: 0.000300 loss: 1.3590 (1.2838) grad: 0.6880 (0.7013) time: 0.4500 data: 0.0035 max mem: 22446 +train: [5] [140/400] eta: 0:02:03 lr: 0.000300 loss: 1.3115 (1.2737) grad: 0.7069 (0.7050) time: 0.4454 data: 0.0033 max mem: 22446 +train: [5] [160/400] eta: 0:01:53 lr: 0.000299 loss: 1.2305 (1.2884) grad: 0.7069 (0.7054) time: 0.4604 data: 0.0033 max mem: 22446 +train: [5] [180/400] eta: 0:01:43 lr: 0.000299 loss: 1.2305 (1.2846) grad: 0.6889 (0.7098) time: 0.4521 data: 0.0032 max mem: 22446 +train: [5] [200/400] eta: 0:01:33 lr: 0.000299 loss: 1.2111 (1.2787) grad: 0.6810 (0.7055) time: 0.4533 data: 0.0035 max mem: 22446 +train: [5] [220/400] eta: 0:01:24 lr: 0.000299 loss: 1.1648 (1.2722) grad: 0.6839 (0.7198) time: 0.4463 data: 0.0034 max mem: 22446 +train: [5] [240/400] eta: 0:01:14 lr: 0.000299 loss: 1.1341 (1.2570) grad: 0.6889 (0.7182) time: 0.4506 data: 0.0034 max mem: 22446 +train: [5] [260/400] eta: 0:01:04 lr: 0.000299 loss: 1.1443 (1.2622) grad: 0.7131 (0.7197) time: 0.4382 data: 0.0033 max mem: 22446 +train: [5] [280/400] eta: 0:00:55 lr: 0.000298 loss: 1.2137 (1.2659) grad: 0.7191 (0.7292) time: 0.4479 data: 0.0035 max mem: 22446 +train: [5] [300/400] eta: 0:00:47 lr: 0.000298 loss: 1.1761 (1.2568) grad: 0.7762 (0.7337) time: 0.6459 data: 0.1853 max mem: 22446 +train: [5] [320/400] eta: 0:00:37 lr: 0.000298 loss: 1.1882 (1.2533) grad: 0.7327 (0.7315) time: 0.4485 data: 0.0032 max mem: 22446 +train: [5] [340/400] eta: 0:00:28 lr: 0.000298 loss: 1.1882 (1.2482) grad: 0.6913 (0.7288) time: 0.4574 data: 0.0035 max mem: 22446 +train: [5] [360/400] eta: 0:00:18 lr: 0.000297 loss: 1.0544 (1.2385) grad: 0.6764 (0.7288) time: 0.4679 data: 0.0035 max mem: 22446 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 1.0342 (1.2337) grad: 0.6764 (0.7323) time: 0.4577 data: 0.0035 max mem: 22446 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 1.1123 (1.2262) grad: 0.6679 (0.7306) time: 0.4604 data: 0.0035 max mem: 22446 +train: [5] Total time: 0:03:08 (0.4710 s / it) +train: [5] Summary: lr: 0.000297 loss: 1.1123 (1.2262) grad: 0.6679 (0.7306) +eval (validation): [5] [ 0/63] eta: 0:03:24 time: 3.2430 data: 3.0129 max mem: 22446 +eval (validation): [5] [20/63] eta: 0:00:21 time: 0.3662 data: 0.0043 max mem: 22446 +eval (validation): [5] [40/63] eta: 0:00:09 time: 0.3507 data: 0.0033 max mem: 22446 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3279 data: 0.0031 max mem: 22446 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3222 data: 0.0031 max mem: 22446 +eval (validation): [5] Total time: 0:00:25 (0.3974 s / it) +cv: [5] best hparam: (8.3, 1.0) (037) ('037_lr8.3e+00_wd1.0e+00') loss: 0.322 acc: 0.928 f1: 0.907 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:22:24 lr: nan time: 3.3617 data: 3.0224 max mem: 22446 +train: [6] [ 20/400] eta: 0:03:42 lr: 0.000296 loss: 0.9415 (0.9634) grad: 0.5855 (0.6209) time: 0.4476 data: 0.0033 max mem: 22446 +train: [6] [ 40/400] eta: 0:03:09 lr: 0.000296 loss: 0.8831 (0.9333) grad: 0.5736 (0.6209) time: 0.4659 data: 0.0035 max mem: 22446 +train: [6] [ 60/400] eta: 0:02:50 lr: 0.000296 loss: 0.9409 (0.9716) grad: 0.6482 (0.6418) time: 0.4506 data: 0.0037 max mem: 22446 +train: [6] [ 80/400] eta: 0:02:39 lr: 0.000295 loss: 1.0271 (0.9912) grad: 0.6653 (0.6333) time: 0.4840 data: 0.0038 max mem: 22446 +train: [6] [100/400] eta: 0:02:26 lr: 0.000295 loss: 1.0171 (1.0003) grad: 0.6099 (0.6368) time: 0.4583 data: 0.0036 max mem: 22446 +train: [6] [120/400] eta: 0:02:15 lr: 0.000295 loss: 0.9381 (0.9893) grad: 0.6121 (0.6365) time: 0.4574 data: 0.0035 max mem: 22446 +train: [6] [140/400] eta: 0:02:04 lr: 0.000294 loss: 0.8828 (0.9908) grad: 0.6706 (0.6495) time: 0.4435 data: 0.0034 max mem: 22446 +train: [6] [160/400] eta: 0:01:54 lr: 0.000294 loss: 0.8828 (0.9863) grad: 0.6314 (0.6431) time: 0.4641 data: 0.0035 max mem: 22446 +train: [6] [180/400] eta: 0:01:44 lr: 0.000293 loss: 0.9114 (0.9878) grad: 0.5833 (0.6403) time: 0.4561 data: 0.0035 max mem: 22446 +train: [6] [200/400] eta: 0:01:34 lr: 0.000293 loss: 0.9619 (0.9949) grad: 0.5833 (0.6345) time: 0.4596 data: 0.0034 max mem: 22446 +train: [6] [220/400] eta: 0:01:25 lr: 0.000292 loss: 0.9034 (0.9864) grad: 0.5568 (0.6296) time: 0.4692 data: 0.0034 max mem: 22446 +train: [6] [240/400] eta: 0:01:15 lr: 0.000292 loss: 0.8862 (0.9805) grad: 0.5687 (0.6232) time: 0.4503 data: 0.0036 max mem: 22446 +train: [6] [260/400] eta: 0:01:05 lr: 0.000291 loss: 0.8015 (0.9676) grad: 0.5687 (0.6210) time: 0.4543 data: 0.0036 max mem: 22446 +train: [6] [280/400] eta: 0:00:56 lr: 0.000291 loss: 0.8680 (0.9683) grad: 0.6385 (0.6197) time: 0.4519 data: 0.0036 max mem: 22446 +train: [6] [300/400] eta: 0:00:48 lr: 0.000290 loss: 0.9739 (0.9666) grad: 0.5912 (0.6177) time: 0.6563 data: 0.1895 max mem: 22446 +train: [6] [320/400] eta: 0:00:38 lr: 0.000290 loss: 0.8435 (0.9610) grad: 0.5496 (0.6141) time: 0.4563 data: 0.0032 max mem: 22446 +train: [6] [340/400] eta: 0:00:28 lr: 0.000289 loss: 0.8271 (0.9554) grad: 0.4891 (0.6079) time: 0.4533 data: 0.0034 max mem: 22446 +train: [6] [360/400] eta: 0:00:19 lr: 0.000288 loss: 0.7490 (0.9460) grad: 0.4492 (0.6006) time: 0.4651 data: 0.0036 max mem: 22446 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 0.7886 (0.9410) grad: 0.4586 (0.5943) time: 0.4497 data: 0.0036 max mem: 22446 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.7863 (0.9293) grad: 0.4468 (0.5865) time: 0.4542 data: 0.0038 max mem: 22446 +train: [6] Total time: 0:03:09 (0.4750 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.7863 (0.9293) grad: 0.4468 (0.5865) +eval (validation): [6] [ 0/63] eta: 0:03:25 time: 3.2622 data: 3.0201 max mem: 22446 +eval (validation): [6] [20/63] eta: 0:00:20 time: 0.3488 data: 0.0103 max mem: 22446 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3489 data: 0.0034 max mem: 22446 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3266 data: 0.0025 max mem: 22446 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3255 data: 0.0028 max mem: 22446 +eval (validation): [6] Total time: 0:00:24 (0.3927 s / it) +cv: [6] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.456 acc: 0.937 f1: 0.926 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:21:49 lr: nan time: 3.2729 data: 2.9364 max mem: 22446 +train: [7] [ 20/400] eta: 0:03:44 lr: 0.000286 loss: 0.7467 (0.7339) grad: 0.4447 (0.4294) time: 0.4556 data: 0.0031 max mem: 22446 +train: [7] [ 40/400] eta: 0:03:08 lr: 0.000286 loss: 0.7467 (0.7492) grad: 0.4351 (0.4349) time: 0.4553 data: 0.0037 max mem: 22446 +train: [7] [ 60/400] eta: 0:02:50 lr: 0.000285 loss: 0.7520 (0.7710) grad: 0.4337 (0.4331) time: 0.4543 data: 0.0035 max mem: 22446 +train: [7] [ 80/400] eta: 0:02:37 lr: 0.000284 loss: 0.7346 (0.7565) grad: 0.4109 (0.4331) time: 0.4643 data: 0.0035 max mem: 22446 +train: [7] [100/400] eta: 0:02:25 lr: 0.000284 loss: 0.6738 (0.7520) grad: 0.3841 (0.4304) time: 0.4583 data: 0.0036 max mem: 22446 +train: [7] [120/400] eta: 0:02:14 lr: 0.000283 loss: 0.6587 (0.7430) grad: 0.3973 (0.4289) time: 0.4593 data: 0.0036 max mem: 22446 +train: [7] [140/400] eta: 0:02:04 lr: 0.000282 loss: 0.7005 (0.7444) grad: 0.4135 (0.4355) time: 0.4607 data: 0.0036 max mem: 22446 +train: [7] [160/400] eta: 0:01:54 lr: 0.000282 loss: 0.7027 (0.7404) grad: 0.4270 (0.4325) time: 0.4546 data: 0.0034 max mem: 22446 +train: [7] [180/400] eta: 0:01:44 lr: 0.000281 loss: 0.6994 (0.7414) grad: 0.4021 (0.4288) time: 0.4579 data: 0.0035 max mem: 22446 +train: [7] [200/400] eta: 0:01:34 lr: 0.000280 loss: 0.6771 (0.7340) grad: 0.4312 (0.4282) time: 0.4529 data: 0.0035 max mem: 22446 +train: [7] [220/400] eta: 0:01:24 lr: 0.000279 loss: 0.6484 (0.7307) grad: 0.4424 (0.4273) time: 0.4586 data: 0.0034 max mem: 22446 +train: [7] [240/400] eta: 0:01:14 lr: 0.000278 loss: 0.6827 (0.7317) grad: 0.4251 (0.4267) time: 0.4513 data: 0.0034 max mem: 22446 +train: [7] [260/400] eta: 0:01:05 lr: 0.000278 loss: 0.6718 (0.7279) grad: 0.4285 (0.4259) time: 0.4510 data: 0.0034 max mem: 22446 +train: [7] [280/400] eta: 0:00:55 lr: 0.000277 loss: 0.6653 (0.7302) grad: 0.3817 (0.4219) time: 0.4481 data: 0.0035 max mem: 22446 +train: [7] [300/400] eta: 0:00:47 lr: 0.000276 loss: 0.6962 (0.7305) grad: 0.3649 (0.4185) time: 0.6246 data: 0.1819 max mem: 22446 +train: [7] [320/400] eta: 0:00:37 lr: 0.000275 loss: 0.6665 (0.7269) grad: 0.3528 (0.4145) time: 0.4409 data: 0.0030 max mem: 22446 +train: [7] [340/400] eta: 0:00:28 lr: 0.000274 loss: 0.6324 (0.7205) grad: 0.3528 (0.4128) time: 0.4550 data: 0.0036 max mem: 22446 +train: [7] [360/400] eta: 0:00:18 lr: 0.000273 loss: 0.5845 (0.7137) grad: 0.3431 (0.4094) time: 0.4656 data: 0.0038 max mem: 22446 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 0.5991 (0.7080) grad: 0.3322 (0.4058) time: 0.4499 data: 0.0038 max mem: 22446 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.6135 (0.7026) grad: 0.3369 (0.4029) time: 0.4525 data: 0.0034 max mem: 22446 +train: [7] Total time: 0:03:08 (0.4708 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.6135 (0.7026) grad: 0.3369 (0.4029) +eval (validation): [7] [ 0/63] eta: 0:04:24 time: 4.2033 data: 3.8918 max mem: 22446 +eval (validation): [7] [20/63] eta: 0:00:23 time: 0.3605 data: 0.0026 max mem: 22446 +eval (validation): [7] [40/63] eta: 0:00:10 time: 0.3492 data: 0.0033 max mem: 22446 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3471 data: 0.0035 max mem: 22446 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3427 data: 0.0035 max mem: 22446 +eval (validation): [7] Total time: 0:00:26 (0.4178 s / it) +cv: [7] best hparam: (8.3, 1.0) (037) ('037_lr8.3e+00_wd1.0e+00') loss: 0.300 acc: 0.943 f1: 0.929 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [8] [ 0/400] eta: 0:22:01 lr: nan time: 3.3047 data: 2.9574 max mem: 22446 +train: [8] [ 20/400] eta: 0:03:48 lr: 0.000270 loss: 0.5605 (0.5891) grad: 0.3333 (0.3335) time: 0.4669 data: 0.0034 max mem: 22446 +train: [8] [ 40/400] eta: 0:03:12 lr: 0.000270 loss: 0.5699 (0.5970) grad: 0.3270 (0.3327) time: 0.4616 data: 0.0034 max mem: 22446 +train: [8] [ 60/400] eta: 0:02:53 lr: 0.000269 loss: 0.5444 (0.5897) grad: 0.2942 (0.3204) time: 0.4628 data: 0.0037 max mem: 22446 +train: [8] [ 80/400] eta: 0:02:39 lr: 0.000268 loss: 0.5262 (0.5808) grad: 0.2803 (0.3120) time: 0.4679 data: 0.0037 max mem: 22446 +train: [8] [100/400] eta: 0:02:27 lr: 0.000267 loss: 0.5230 (0.5716) grad: 0.2607 (0.3005) time: 0.4608 data: 0.0037 max mem: 22446 +train: [8] [120/400] eta: 0:02:15 lr: 0.000266 loss: 0.5641 (0.5849) grad: 0.2801 (0.3059) time: 0.4486 data: 0.0035 max mem: 22446 +train: [8] [140/400] eta: 0:02:05 lr: 0.000265 loss: 0.5899 (0.5854) grad: 0.3360 (0.3110) time: 0.4621 data: 0.0035 max mem: 22446 +train: [8] [160/400] eta: 0:01:54 lr: 0.000264 loss: 0.5550 (0.5837) grad: 0.2936 (0.3099) time: 0.4450 data: 0.0034 max mem: 22446 +train: [8] [180/400] eta: 0:01:44 lr: 0.000263 loss: 0.5436 (0.5811) grad: 0.2813 (0.3073) time: 0.4661 data: 0.0036 max mem: 22446 +train: [8] [200/400] eta: 0:01:34 lr: 0.000262 loss: 0.5331 (0.5815) grad: 0.2917 (0.3094) time: 0.4519 data: 0.0035 max mem: 22446 +train: [8] [220/400] eta: 0:01:24 lr: 0.000260 loss: 0.5769 (0.5830) grad: 0.3058 (0.3093) time: 0.4499 data: 0.0035 max mem: 22446 +train: [8] [240/400] eta: 0:01:15 lr: 0.000259 loss: 0.6026 (0.5871) grad: 0.3036 (0.3093) time: 0.4502 data: 0.0036 max mem: 22446 +train: [8] [260/400] eta: 0:01:05 lr: 0.000258 loss: 0.5892 (0.5849) grad: 0.2983 (0.3095) time: 0.4437 data: 0.0034 max mem: 22446 +train: [8] [280/400] eta: 0:00:55 lr: 0.000257 loss: 0.5792 (0.5884) grad: 0.3205 (0.3114) time: 0.4539 data: 0.0036 max mem: 22446 +train: [8] [300/400] eta: 0:00:47 lr: 0.000256 loss: 0.5698 (0.5871) grad: 0.3240 (0.3110) time: 0.6282 data: 0.1863 max mem: 22446 +train: [8] [320/400] eta: 0:00:38 lr: 0.000255 loss: 0.5313 (0.5823) grad: 0.2796 (0.3078) time: 0.4455 data: 0.0035 max mem: 22446 +train: [8] [340/400] eta: 0:00:28 lr: 0.000254 loss: 0.5189 (0.5798) grad: 0.2442 (0.3037) time: 0.4696 data: 0.0033 max mem: 22446 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 0.5117 (0.5737) grad: 0.2166 (0.2986) time: 0.4640 data: 0.0035 max mem: 22446 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 0.4781 (0.5701) grad: 0.2303 (0.2952) time: 0.4604 data: 0.0037 max mem: 22446 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.4811 (0.5665) grad: 0.2302 (0.2914) time: 0.4579 data: 0.0036 max mem: 22446 +train: [8] Total time: 0:03:09 (0.4733 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.4811 (0.5665) grad: 0.2302 (0.2914) +eval (validation): [8] [ 0/63] eta: 0:03:38 time: 3.4705 data: 3.1741 max mem: 22446 +eval (validation): [8] [20/63] eta: 0:00:22 time: 0.3768 data: 0.0037 max mem: 22446 +eval (validation): [8] [40/63] eta: 0:00:10 time: 0.3679 data: 0.0035 max mem: 22446 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3456 data: 0.0032 max mem: 22446 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3440 data: 0.0032 max mem: 22446 +eval (validation): [8] Total time: 0:00:26 (0.4175 s / it) +cv: [8] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.723 acc: 0.949 f1: 0.940 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [9] [ 0/400] eta: 0:22:43 lr: nan time: 3.4097 data: 3.0668 max mem: 22446 +train: [9] [ 20/400] eta: 0:03:44 lr: 0.000249 loss: 0.4806 (0.5352) grad: 0.2084 (0.2138) time: 0.4488 data: 0.0037 max mem: 22446 +train: [9] [ 40/400] eta: 0:03:08 lr: 0.000248 loss: 0.4799 (0.5103) grad: 0.2106 (0.2163) time: 0.4533 data: 0.0030 max mem: 22446 +train: [9] [ 60/400] eta: 0:02:51 lr: 0.000247 loss: 0.4799 (0.5085) grad: 0.2145 (0.2132) time: 0.4661 data: 0.0036 max mem: 22446 +train: [9] [ 80/400] eta: 0:02:37 lr: 0.000246 loss: 0.5009 (0.5061) grad: 0.2145 (0.2126) time: 0.4546 data: 0.0034 max mem: 22446 +train: [9] [100/400] eta: 0:02:25 lr: 0.000244 loss: 0.4530 (0.5002) grad: 0.2087 (0.2121) time: 0.4578 data: 0.0035 max mem: 22446 +train: [9] [120/400] eta: 0:02:15 lr: 0.000243 loss: 0.5037 (0.5043) grad: 0.2047 (0.2119) time: 0.4708 data: 0.0035 max mem: 22446 +train: [9] [140/400] eta: 0:02:04 lr: 0.000242 loss: 0.5037 (0.5041) grad: 0.2047 (0.2125) time: 0.4542 data: 0.0035 max mem: 22446 +train: [9] [160/400] eta: 0:01:54 lr: 0.000241 loss: 0.4789 (0.5034) grad: 0.2171 (0.2136) time: 0.4576 data: 0.0035 max mem: 22446 +train: [9] [180/400] eta: 0:01:44 lr: 0.000240 loss: 0.4802 (0.5016) grad: 0.2137 (0.2136) time: 0.4615 data: 0.0035 max mem: 22446 +train: [9] [200/400] eta: 0:01:34 lr: 0.000238 loss: 0.4800 (0.4995) grad: 0.2114 (0.2136) time: 0.4598 data: 0.0034 max mem: 22446 +train: [9] [220/400] eta: 0:01:24 lr: 0.000237 loss: 0.4776 (0.5011) grad: 0.2127 (0.2136) time: 0.4529 data: 0.0033 max mem: 22446 +train: [9] [240/400] eta: 0:01:15 lr: 0.000236 loss: 0.4828 (0.4991) grad: 0.2180 (0.2141) time: 0.4690 data: 0.0033 max mem: 22446 +train: [9] [260/400] eta: 0:01:05 lr: 0.000234 loss: 0.4868 (0.5004) grad: 0.2107 (0.2145) time: 0.4597 data: 0.0033 max mem: 22446 +train: [9] [280/400] eta: 0:00:56 lr: 0.000233 loss: 0.4868 (0.4988) grad: 0.2100 (0.2146) time: 0.4573 data: 0.0035 max mem: 22446 +train: [9] [300/400] eta: 0:00:47 lr: 0.000232 loss: 0.4844 (0.4987) grad: 0.2085 (0.2140) time: 0.6251 data: 0.1884 max mem: 22446 +train: [9] [320/400] eta: 0:00:38 lr: 0.000230 loss: 0.4715 (0.4957) grad: 0.1857 (0.2121) time: 0.4437 data: 0.0030 max mem: 22446 +train: [9] [340/400] eta: 0:00:28 lr: 0.000229 loss: 0.4569 (0.4939) grad: 0.1852 (0.2112) time: 0.4529 data: 0.0035 max mem: 22446 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 0.4673 (0.4921) grad: 0.1961 (0.2100) time: 0.4532 data: 0.0036 max mem: 22446 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 0.4580 (0.4906) grad: 0.1950 (0.2095) time: 0.4537 data: 0.0035 max mem: 22446 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.4633 (0.4900) grad: 0.1845 (0.2079) time: 0.4699 data: 0.0035 max mem: 22446 +train: [9] Total time: 0:03:09 (0.4738 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.4633 (0.4900) grad: 0.1845 (0.2079) +eval (validation): [9] [ 0/63] eta: 0:03:33 time: 3.3827 data: 3.0987 max mem: 22446 +eval (validation): [9] [20/63] eta: 0:00:22 time: 0.3904 data: 0.0029 max mem: 22446 +eval (validation): [9] [40/63] eta: 0:00:10 time: 0.3751 data: 0.0034 max mem: 22446 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3264 data: 0.0031 max mem: 22446 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3309 data: 0.0031 max mem: 22446 +eval (validation): [9] Total time: 0:00:26 (0.4165 s / it) +cv: [9] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 1.762 acc: 0.950 f1: 0.942 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [10] [ 0/400] eta: 0:23:20 lr: nan time: 3.5003 data: 3.1161 max mem: 22446 +train: [10] [ 20/400] eta: 0:03:51 lr: 0.000224 loss: 0.4989 (0.5118) grad: 0.1798 (0.1901) time: 0.4652 data: 0.0026 max mem: 22446 +train: [10] [ 40/400] eta: 0:03:13 lr: 0.000222 loss: 0.4632 (0.4835) grad: 0.1798 (0.1856) time: 0.4622 data: 0.0033 max mem: 22446 +train: [10] [ 60/400] eta: 0:02:55 lr: 0.000221 loss: 0.4590 (0.4738) grad: 0.1724 (0.1820) time: 0.4699 data: 0.0034 max mem: 22446 +train: [10] [ 80/400] eta: 0:02:40 lr: 0.000220 loss: 0.4354 (0.4660) grad: 0.1620 (0.1776) time: 0.4606 data: 0.0033 max mem: 22446 +train: [10] [100/400] eta: 0:02:27 lr: 0.000218 loss: 0.4354 (0.4639) grad: 0.1650 (0.1763) time: 0.4445 data: 0.0033 max mem: 22446 +train: [10] [120/400] eta: 0:02:16 lr: 0.000217 loss: 0.4463 (0.4574) grad: 0.1760 (0.1764) time: 0.4701 data: 0.0035 max mem: 22446 +train: [10] [140/400] eta: 0:02:05 lr: 0.000215 loss: 0.4418 (0.4564) grad: 0.1807 (0.1754) time: 0.4533 data: 0.0034 max mem: 22446 +train: [10] [160/400] eta: 0:01:54 lr: 0.000214 loss: 0.4385 (0.4528) grad: 0.1673 (0.1754) time: 0.4530 data: 0.0033 max mem: 22446 +train: [10] [180/400] eta: 0:01:44 lr: 0.000213 loss: 0.4279 (0.4518) grad: 0.1649 (0.1753) time: 0.4568 data: 0.0035 max mem: 22446 +train: [10] [200/400] eta: 0:01:34 lr: 0.000211 loss: 0.4655 (0.4525) grad: 0.1695 (0.1756) time: 0.4551 data: 0.0034 max mem: 22446 +train: [10] [220/400] eta: 0:01:25 lr: 0.000210 loss: 0.4522 (0.4517) grad: 0.1614 (0.1744) time: 0.4559 data: 0.0033 max mem: 22446 +train: [10] [240/400] eta: 0:01:15 lr: 0.000208 loss: 0.4388 (0.4500) grad: 0.1627 (0.1741) time: 0.4737 data: 0.0034 max mem: 22446 +train: [10] [260/400] eta: 0:01:05 lr: 0.000207 loss: 0.4372 (0.4496) grad: 0.1627 (0.1738) time: 0.4566 data: 0.0034 max mem: 22446 +train: [10] [280/400] eta: 0:00:56 lr: 0.000205 loss: 0.4430 (0.4483) grad: 0.1551 (0.1723) time: 0.4472 data: 0.0035 max mem: 22446 +train: [10] [300/400] eta: 0:00:47 lr: 0.000204 loss: 0.4374 (0.4466) grad: 0.1487 (0.1707) time: 0.6112 data: 0.1793 max mem: 22446 +train: [10] [320/400] eta: 0:00:38 lr: 0.000202 loss: 0.4259 (0.4456) grad: 0.1487 (0.1696) time: 0.4415 data: 0.0031 max mem: 22446 +train: [10] [340/400] eta: 0:00:28 lr: 0.000201 loss: 0.4259 (0.4448) grad: 0.1532 (0.1686) time: 0.4517 data: 0.0035 max mem: 22446 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 0.4212 (0.4431) grad: 0.1468 (0.1672) time: 0.4457 data: 0.0034 max mem: 22446 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 0.4198 (0.4416) grad: 0.1331 (0.1655) time: 0.4525 data: 0.0034 max mem: 22446 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.4191 (0.4411) grad: 0.1334 (0.1646) time: 0.4694 data: 0.0035 max mem: 22446 +train: [10] Total time: 0:03:09 (0.4727 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.4191 (0.4411) grad: 0.1334 (0.1646) +eval (validation): [10] [ 0/63] eta: 0:03:26 time: 3.2825 data: 3.0523 max mem: 22446 +eval (validation): [10] [20/63] eta: 0:00:21 time: 0.3695 data: 0.0050 max mem: 22446 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3229 data: 0.0029 max mem: 22446 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3316 data: 0.0032 max mem: 22446 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3340 data: 0.0032 max mem: 22446 +eval (validation): [10] Total time: 0:00:24 (0.3930 s / it) +cv: [10] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.509 acc: 0.957 f1: 0.950 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [11] [ 0/400] eta: 0:22:59 lr: nan time: 3.4498 data: 3.0546 max mem: 22446 +train: [11] [ 20/400] eta: 0:03:59 lr: 0.000195 loss: 0.4001 (0.4039) grad: 0.1312 (0.1390) time: 0.4879 data: 0.0031 max mem: 22446 +train: [11] [ 40/400] eta: 0:03:16 lr: 0.000193 loss: 0.4133 (0.4116) grad: 0.1314 (0.1376) time: 0.4610 data: 0.0035 max mem: 22446 +train: [11] [ 60/400] eta: 0:02:57 lr: 0.000192 loss: 0.4240 (0.4169) grad: 0.1355 (0.1387) time: 0.4742 data: 0.0036 max mem: 22446 +train: [11] [ 80/400] eta: 0:02:42 lr: 0.000190 loss: 0.4153 (0.4151) grad: 0.1477 (0.1417) time: 0.4661 data: 0.0036 max mem: 22446 +train: [11] [100/400] eta: 0:02:29 lr: 0.000189 loss: 0.4052 (0.4119) grad: 0.1481 (0.1427) time: 0.4518 data: 0.0034 max mem: 22446 +train: [11] [120/400] eta: 0:02:17 lr: 0.000187 loss: 0.4118 (0.4130) grad: 0.1442 (0.1449) time: 0.4649 data: 0.0035 max mem: 22446 +train: [11] [140/400] eta: 0:02:06 lr: 0.000186 loss: 0.4133 (0.4153) grad: 0.1474 (0.1448) time: 0.4526 data: 0.0035 max mem: 22446 +train: [11] [160/400] eta: 0:01:55 lr: 0.000184 loss: 0.4332 (0.4162) grad: 0.1363 (0.1452) time: 0.4565 data: 0.0035 max mem: 22446 +train: [11] [180/400] eta: 0:01:46 lr: 0.000183 loss: 0.4003 (0.4141) grad: 0.1361 (0.1451) time: 0.4929 data: 0.0036 max mem: 22446 +train: [11] [200/400] eta: 0:01:36 lr: 0.000181 loss: 0.3943 (0.4117) grad: 0.1360 (0.1439) time: 0.4571 data: 0.0036 max mem: 22446 +train: [11] [220/400] eta: 0:01:26 lr: 0.000180 loss: 0.3962 (0.4108) grad: 0.1285 (0.1425) time: 0.4536 data: 0.0035 max mem: 22446 +train: [11] [240/400] eta: 0:01:16 lr: 0.000178 loss: 0.3962 (0.4090) grad: 0.1287 (0.1422) time: 0.4647 data: 0.0035 max mem: 22446 +train: [11] [260/400] eta: 0:01:06 lr: 0.000177 loss: 0.4052 (0.4101) grad: 0.1418 (0.1418) time: 0.4640 data: 0.0035 max mem: 22446 +train: [11] [280/400] eta: 0:00:57 lr: 0.000175 loss: 0.4150 (0.4095) grad: 0.1434 (0.1421) time: 0.4668 data: 0.0038 max mem: 22446 +train: [11] [300/400] eta: 0:00:48 lr: 0.000174 loss: 0.4249 (0.4104) grad: 0.1430 (0.1415) time: 0.6227 data: 0.1828 max mem: 22446 +train: [11] [320/400] eta: 0:00:38 lr: 0.000172 loss: 0.4114 (0.4096) grad: 0.1334 (0.1413) time: 0.4515 data: 0.0031 max mem: 22446 +train: [11] [340/400] eta: 0:00:28 lr: 0.000170 loss: 0.3944 (0.4086) grad: 0.1293 (0.1404) time: 0.4525 data: 0.0036 max mem: 22446 +train: [11] [360/400] eta: 0:00:19 lr: 0.000169 loss: 0.3905 (0.4076) grad: 0.1215 (0.1395) time: 0.4462 data: 0.0035 max mem: 22446 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 0.3905 (0.4074) grad: 0.1215 (0.1388) time: 0.4591 data: 0.0034 max mem: 22446 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.3944 (0.4074) grad: 0.1247 (0.1386) time: 0.4684 data: 0.0035 max mem: 22446 +train: [11] Total time: 0:03:11 (0.4785 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.3944 (0.4074) grad: 0.1247 (0.1386) +eval (validation): [11] [ 0/63] eta: 0:03:30 time: 3.3467 data: 3.0603 max mem: 22446 +eval (validation): [11] [20/63] eta: 0:00:22 time: 0.3836 data: 0.0171 max mem: 22446 +eval (validation): [11] [40/63] eta: 0:00:10 time: 0.3676 data: 0.0032 max mem: 22446 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3353 data: 0.0035 max mem: 22446 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3381 data: 0.0035 max mem: 22446 +eval (validation): [11] Total time: 0:00:26 (0.4145 s / it) +cv: [11] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.380 acc: 0.957 f1: 0.950 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [12] [ 0/400] eta: 0:22:28 lr: nan time: 3.3710 data: 3.0372 max mem: 22446 +train: [12] [ 20/400] eta: 0:03:46 lr: 0.000164 loss: 0.3691 (0.3722) grad: 0.1366 (0.1328) time: 0.4578 data: 0.0033 max mem: 22446 +train: [12] [ 40/400] eta: 0:03:08 lr: 0.000163 loss: 0.3753 (0.3822) grad: 0.1259 (0.1276) time: 0.4469 data: 0.0031 max mem: 22446 +train: [12] [ 60/400] eta: 0:02:52 lr: 0.000161 loss: 0.3827 (0.3822) grad: 0.1204 (0.1267) time: 0.4708 data: 0.0035 max mem: 22446 +train: [12] [ 80/400] eta: 0:02:38 lr: 0.000160 loss: 0.3769 (0.3834) grad: 0.1211 (0.1253) time: 0.4559 data: 0.0034 max mem: 22446 +train: [12] [100/400] eta: 0:02:26 lr: 0.000158 loss: 0.3813 (0.3882) grad: 0.1171 (0.1232) time: 0.4631 data: 0.0034 max mem: 22446 +train: [12] [120/400] eta: 0:02:14 lr: 0.000156 loss: 0.3957 (0.3895) grad: 0.1171 (0.1228) time: 0.4510 data: 0.0033 max mem: 22446 +train: [12] [140/400] eta: 0:02:04 lr: 0.000155 loss: 0.3895 (0.3895) grad: 0.1126 (0.1216) time: 0.4532 data: 0.0035 max mem: 22446 +train: [12] [160/400] eta: 0:01:54 lr: 0.000153 loss: 0.3922 (0.3894) grad: 0.1126 (0.1212) time: 0.4631 data: 0.0034 max mem: 22446 +train: [12] [180/400] eta: 0:01:44 lr: 0.000152 loss: 0.3922 (0.3895) grad: 0.1180 (0.1210) time: 0.4795 data: 0.0035 max mem: 22446 +train: [12] [200/400] eta: 0:01:34 lr: 0.000150 loss: 0.3713 (0.3887) grad: 0.1170 (0.1210) time: 0.4528 data: 0.0034 max mem: 22446 +train: [12] [220/400] eta: 0:01:25 lr: 0.000149 loss: 0.3700 (0.3883) grad: 0.1191 (0.1211) time: 0.4616 data: 0.0034 max mem: 22446 +train: [12] [240/400] eta: 0:01:15 lr: 0.000147 loss: 0.3954 (0.3897) grad: 0.1210 (0.1217) time: 0.4767 data: 0.0037 max mem: 22446 +train: [12] [260/400] eta: 0:01:06 lr: 0.000145 loss: 0.4010 (0.3894) grad: 0.1137 (0.1209) time: 0.4633 data: 0.0036 max mem: 22446 +train: [12] [280/400] eta: 0:00:56 lr: 0.000144 loss: 0.3689 (0.3885) grad: 0.1122 (0.1202) time: 0.4611 data: 0.0037 max mem: 22446 +train: [12] [300/400] eta: 0:00:48 lr: 0.000142 loss: 0.3815 (0.3891) grad: 0.1134 (0.1204) time: 0.6258 data: 0.1834 max mem: 22446 +train: [12] [320/400] eta: 0:00:38 lr: 0.000141 loss: 0.3953 (0.3890) grad: 0.1168 (0.1199) time: 0.4422 data: 0.0033 max mem: 22446 +train: [12] [340/400] eta: 0:00:28 lr: 0.000139 loss: 0.3979 (0.3899) grad: 0.1148 (0.1196) time: 0.4490 data: 0.0031 max mem: 22446 +train: [12] [360/400] eta: 0:00:19 lr: 0.000138 loss: 0.3954 (0.3895) grad: 0.1148 (0.1195) time: 0.4551 data: 0.0034 max mem: 22446 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 0.3891 (0.3900) grad: 0.1149 (0.1197) time: 0.4579 data: 0.0034 max mem: 22446 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.3749 (0.3892) grad: 0.1183 (0.1197) time: 0.4548 data: 0.0035 max mem: 22446 +train: [12] Total time: 0:03:09 (0.4747 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.3749 (0.3892) grad: 0.1183 (0.1197) +eval (validation): [12] [ 0/63] eta: 0:03:32 time: 3.3688 data: 3.0841 max mem: 22446 +eval (validation): [12] [20/63] eta: 0:00:22 time: 0.3732 data: 0.0045 max mem: 22446 +eval (validation): [12] [40/63] eta: 0:00:10 time: 0.3690 data: 0.0031 max mem: 22446 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3306 data: 0.0033 max mem: 22446 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3307 data: 0.0032 max mem: 22446 +eval (validation): [12] Total time: 0:00:25 (0.4089 s / it) +cv: [12] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.409 acc: 0.960 f1: 0.953 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [13] [ 0/400] eta: 0:22:48 lr: nan time: 3.4205 data: 3.0727 max mem: 22446 +train: [13] [ 20/400] eta: 0:03:48 lr: 0.000133 loss: 0.3647 (0.3822) grad: 0.1045 (0.1066) time: 0.4590 data: 0.0031 max mem: 22446 +train: [13] [ 40/400] eta: 0:03:09 lr: 0.000131 loss: 0.3647 (0.3694) grad: 0.1040 (0.1072) time: 0.4492 data: 0.0036 max mem: 22446 +train: [13] [ 60/400] eta: 0:02:54 lr: 0.000130 loss: 0.3637 (0.3699) grad: 0.1095 (0.1123) time: 0.4819 data: 0.0028 max mem: 22446 +train: [13] [ 80/400] eta: 0:02:40 lr: 0.000128 loss: 0.3591 (0.3713) grad: 0.1092 (0.1101) time: 0.4704 data: 0.0034 max mem: 22446 +train: [13] [100/400] eta: 0:02:29 lr: 0.000127 loss: 0.3848 (0.3742) grad: 0.1043 (0.1104) time: 0.4887 data: 0.0037 max mem: 22446 +train: [13] [120/400] eta: 0:02:17 lr: 0.000125 loss: 0.3774 (0.3721) grad: 0.1077 (0.1103) time: 0.4541 data: 0.0033 max mem: 22446 +train: [13] [140/400] eta: 0:02:06 lr: 0.000124 loss: 0.3528 (0.3730) grad: 0.1073 (0.1106) time: 0.4685 data: 0.0034 max mem: 22446 +train: [13] [160/400] eta: 0:01:56 lr: 0.000122 loss: 0.3528 (0.3735) grad: 0.1074 (0.1105) time: 0.4693 data: 0.0034 max mem: 22446 +train: [13] [180/400] eta: 0:01:46 lr: 0.000120 loss: 0.3736 (0.3763) grad: 0.1075 (0.1109) time: 0.4586 data: 0.0032 max mem: 22446 +train: [13] [200/400] eta: 0:01:36 lr: 0.000119 loss: 0.3814 (0.3768) grad: 0.1157 (0.1116) time: 0.4666 data: 0.0035 max mem: 22446 +train: [13] [220/400] eta: 0:01:26 lr: 0.000117 loss: 0.3727 (0.3760) grad: 0.1137 (0.1121) time: 0.4635 data: 0.0034 max mem: 22446 +train: [13] [240/400] eta: 0:01:16 lr: 0.000116 loss: 0.3727 (0.3765) grad: 0.1113 (0.1123) time: 0.4708 data: 0.0035 max mem: 22446 +train: [13] [260/400] eta: 0:01:06 lr: 0.000114 loss: 0.3669 (0.3763) grad: 0.1128 (0.1125) time: 0.4511 data: 0.0034 max mem: 22446 +train: [13] [280/400] eta: 0:00:57 lr: 0.000113 loss: 0.3669 (0.3755) grad: 0.1103 (0.1127) time: 0.4542 data: 0.0034 max mem: 22446 +train: [13] [300/400] eta: 0:00:48 lr: 0.000111 loss: 0.3669 (0.3763) grad: 0.1108 (0.1131) time: 0.6372 data: 0.1799 max mem: 22446 +train: [13] [320/400] eta: 0:00:38 lr: 0.000110 loss: 0.3669 (0.3753) grad: 0.1126 (0.1128) time: 0.4518 data: 0.0029 max mem: 22446 +train: [13] [340/400] eta: 0:00:28 lr: 0.000108 loss: 0.3660 (0.3743) grad: 0.1084 (0.1127) time: 0.4543 data: 0.0033 max mem: 22446 +train: [13] [360/400] eta: 0:00:19 lr: 0.000107 loss: 0.3298 (0.3728) grad: 0.1090 (0.1124) time: 0.4547 data: 0.0033 max mem: 22446 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 0.3592 (0.3731) grad: 0.1087 (0.1123) time: 0.4539 data: 0.0034 max mem: 22446 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.3762 (0.3727) grad: 0.1051 (0.1120) time: 0.4637 data: 0.0035 max mem: 22446 +train: [13] Total time: 0:03:11 (0.4788 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.3762 (0.3727) grad: 0.1051 (0.1120) +eval (validation): [13] [ 0/63] eta: 0:03:30 time: 3.3370 data: 3.0355 max mem: 22446 +eval (validation): [13] [20/63] eta: 0:00:21 time: 0.3568 data: 0.0037 max mem: 22446 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3380 data: 0.0034 max mem: 22446 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3298 data: 0.0032 max mem: 22446 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3299 data: 0.0032 max mem: 22446 +eval (validation): [13] Total time: 0:00:24 (0.3934 s / it) +cv: [13] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 0.299 acc: 0.962 f1: 0.955 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [14] [ 0/400] eta: 0:23:08 lr: nan time: 3.4702 data: 3.1314 max mem: 22446 +train: [14] [ 20/400] eta: 0:03:44 lr: 0.000102 loss: 0.3753 (0.3688) grad: 0.0948 (0.1011) time: 0.4477 data: 0.0027 max mem: 22446 +train: [14] [ 40/400] eta: 0:03:08 lr: 0.000101 loss: 0.3656 (0.3649) grad: 0.0948 (0.0994) time: 0.4505 data: 0.0034 max mem: 22446 +train: [14] [ 60/400] eta: 0:02:49 lr: 0.000099 loss: 0.3433 (0.3578) grad: 0.0985 (0.1001) time: 0.4496 data: 0.0036 max mem: 22446 +train: [14] [ 80/400] eta: 0:02:37 lr: 0.000098 loss: 0.3431 (0.3561) grad: 0.1028 (0.1030) time: 0.4658 data: 0.0036 max mem: 22446 +train: [14] [100/400] eta: 0:02:24 lr: 0.000096 loss: 0.3570 (0.3589) grad: 0.1077 (0.1043) time: 0.4537 data: 0.0035 max mem: 22446 +train: [14] [120/400] eta: 0:02:14 lr: 0.000095 loss: 0.3745 (0.3641) grad: 0.1108 (0.1056) time: 0.4555 data: 0.0035 max mem: 22446 +train: [14] [140/400] eta: 0:02:03 lr: 0.000093 loss: 0.3945 (0.3695) grad: 0.1142 (0.1074) time: 0.4484 data: 0.0034 max mem: 22446 +train: [14] [160/400] eta: 0:01:53 lr: 0.000092 loss: 0.3713 (0.3685) grad: 0.1037 (0.1071) time: 0.4603 data: 0.0034 max mem: 22446 +train: [14] [180/400] eta: 0:01:43 lr: 0.000090 loss: 0.3550 (0.3677) grad: 0.1016 (0.1068) time: 0.4573 data: 0.0034 max mem: 22446 +train: [14] [200/400] eta: 0:01:33 lr: 0.000089 loss: 0.3622 (0.3673) grad: 0.1037 (0.1076) time: 0.4496 data: 0.0034 max mem: 22446 +train: [14] [220/400] eta: 0:01:24 lr: 0.000088 loss: 0.3603 (0.3674) grad: 0.1029 (0.1072) time: 0.4540 data: 0.0035 max mem: 22446 +train: [14] [240/400] eta: 0:01:14 lr: 0.000086 loss: 0.3450 (0.3654) grad: 0.1029 (0.1073) time: 0.4604 data: 0.0035 max mem: 22446 +train: [14] [260/400] eta: 0:01:05 lr: 0.000085 loss: 0.3445 (0.3647) grad: 0.1058 (0.1072) time: 0.4537 data: 0.0034 max mem: 22446 +train: [14] [280/400] eta: 0:00:55 lr: 0.000083 loss: 0.3700 (0.3663) grad: 0.1077 (0.1074) time: 0.4505 data: 0.0036 max mem: 22446 +train: [14] [300/400] eta: 0:00:47 lr: 0.000082 loss: 0.3674 (0.3658) grad: 0.1061 (0.1074) time: 0.6143 data: 0.1786 max mem: 22446 +train: [14] [320/400] eta: 0:00:37 lr: 0.000081 loss: 0.3610 (0.3652) grad: 0.1004 (0.1068) time: 0.4679 data: 0.0058 max mem: 22446 +train: [14] [340/400] eta: 0:00:28 lr: 0.000079 loss: 0.3711 (0.3662) grad: 0.0990 (0.1066) time: 0.4756 data: 0.0032 max mem: 22446 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 0.3668 (0.3660) grad: 0.1049 (0.1067) time: 0.4654 data: 0.0036 max mem: 22446 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 0.3554 (0.3659) grad: 0.1057 (0.1067) time: 0.4641 data: 0.0036 max mem: 22446 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.3585 (0.3658) grad: 0.1049 (0.1067) time: 0.4565 data: 0.0035 max mem: 22446 +train: [14] Total time: 0:03:09 (0.4729 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.3585 (0.3658) grad: 0.1049 (0.1067) +eval (validation): [14] [ 0/63] eta: 0:03:28 time: 3.3106 data: 3.0790 max mem: 22446 +eval (validation): [14] [20/63] eta: 0:00:22 time: 0.3719 data: 0.0193 max mem: 22446 +eval (validation): [14] [40/63] eta: 0:00:09 time: 0.3466 data: 0.0031 max mem: 22446 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3258 data: 0.0033 max mem: 22446 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3269 data: 0.0032 max mem: 22446 +eval (validation): [14] Total time: 0:00:25 (0.3997 s / it) +cv: [14] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.311 acc: 0.963 f1: 0.955 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [15] [ 0/400] eta: 0:22:39 lr: nan time: 3.3998 data: 3.0107 max mem: 22446 +train: [15] [ 20/400] eta: 0:03:46 lr: 0.000074 loss: 0.3739 (0.3731) grad: 0.0973 (0.1006) time: 0.4568 data: 0.0038 max mem: 22446 +train: [15] [ 40/400] eta: 0:03:10 lr: 0.000072 loss: 0.3739 (0.3733) grad: 0.0993 (0.1045) time: 0.4574 data: 0.0034 max mem: 22446 +train: [15] [ 60/400] eta: 0:02:52 lr: 0.000071 loss: 0.3642 (0.3701) grad: 0.0999 (0.1033) time: 0.4597 data: 0.0036 max mem: 22446 +train: [15] [ 80/400] eta: 0:02:38 lr: 0.000070 loss: 0.3545 (0.3664) grad: 0.0997 (0.1027) time: 0.4561 data: 0.0035 max mem: 22446 +train: [15] [100/400] eta: 0:02:25 lr: 0.000068 loss: 0.3452 (0.3619) grad: 0.1002 (0.1026) time: 0.4480 data: 0.0036 max mem: 22446 +train: [15] [120/400] eta: 0:02:14 lr: 0.000067 loss: 0.3361 (0.3595) grad: 0.1002 (0.1034) time: 0.4537 data: 0.0035 max mem: 22446 +train: [15] [140/400] eta: 0:02:03 lr: 0.000066 loss: 0.3435 (0.3601) grad: 0.1053 (0.1043) time: 0.4552 data: 0.0034 max mem: 22446 +train: [15] [160/400] eta: 0:01:53 lr: 0.000064 loss: 0.3528 (0.3579) grad: 0.1067 (0.1050) time: 0.4539 data: 0.0034 max mem: 22446 +train: [15] [180/400] eta: 0:01:43 lr: 0.000063 loss: 0.3503 (0.3578) grad: 0.1067 (0.1054) time: 0.4542 data: 0.0036 max mem: 22446 +train: [15] [200/400] eta: 0:01:33 lr: 0.000062 loss: 0.3439 (0.3560) grad: 0.1021 (0.1048) time: 0.4500 data: 0.0035 max mem: 22446 +train: [15] [220/400] eta: 0:01:24 lr: 0.000061 loss: 0.3439 (0.3557) grad: 0.1024 (0.1050) time: 0.4545 data: 0.0035 max mem: 22446 +train: [15] [240/400] eta: 0:01:14 lr: 0.000059 loss: 0.3467 (0.3548) grad: 0.1048 (0.1049) time: 0.4572 data: 0.0034 max mem: 22446 +train: [15] [260/400] eta: 0:01:05 lr: 0.000058 loss: 0.3446 (0.3548) grad: 0.1039 (0.1045) time: 0.4573 data: 0.0034 max mem: 22446 +train: [15] [280/400] eta: 0:00:55 lr: 0.000057 loss: 0.3557 (0.3556) grad: 0.1020 (0.1046) time: 0.4566 data: 0.0034 max mem: 22446 +train: [15] [300/400] eta: 0:00:47 lr: 0.000056 loss: 0.3454 (0.3562) grad: 0.1004 (0.1047) time: 0.6178 data: 0.1798 max mem: 22446 +train: [15] [320/400] eta: 0:00:37 lr: 0.000054 loss: 0.3422 (0.3562) grad: 0.1020 (0.1048) time: 0.4474 data: 0.0030 max mem: 22446 +train: [15] [340/400] eta: 0:00:28 lr: 0.000053 loss: 0.3509 (0.3566) grad: 0.1100 (0.1049) time: 0.4601 data: 0.0034 max mem: 22446 +train: [15] [360/400] eta: 0:00:18 lr: 0.000052 loss: 0.3510 (0.3558) grad: 0.1021 (0.1047) time: 0.4615 data: 0.0035 max mem: 22446 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 0.3504 (0.3554) grad: 0.1027 (0.1045) time: 0.4552 data: 0.0035 max mem: 22446 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.3561 (0.3569) grad: 0.1027 (0.1046) time: 0.4460 data: 0.0035 max mem: 22446 +train: [15] Total time: 0:03:08 (0.4706 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.3561 (0.3569) grad: 0.1027 (0.1046) +eval (validation): [15] [ 0/63] eta: 0:03:22 time: 3.2187 data: 2.9736 max mem: 22446 +eval (validation): [15] [20/63] eta: 0:00:20 time: 0.3499 data: 0.0069 max mem: 22446 +eval (validation): [15] [40/63] eta: 0:00:09 time: 0.3338 data: 0.0034 max mem: 22446 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3446 data: 0.0030 max mem: 22446 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3422 data: 0.0033 max mem: 22446 +eval (validation): [15] Total time: 0:00:24 (0.3937 s / it) +cv: [15] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 0.278 acc: 0.962 f1: 0.955 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:22:15 lr: nan time: 3.3383 data: 3.0007 max mem: 22446 +train: [16] [ 20/400] eta: 0:03:40 lr: 0.000048 loss: 0.3383 (0.3520) grad: 0.0948 (0.1009) time: 0.4430 data: 0.0034 max mem: 22446 +train: [16] [ 40/400] eta: 0:03:04 lr: 0.000047 loss: 0.3596 (0.3556) grad: 0.0996 (0.1039) time: 0.4433 data: 0.0031 max mem: 22446 +train: [16] [ 60/400] eta: 0:02:48 lr: 0.000046 loss: 0.3549 (0.3524) grad: 0.1045 (0.1038) time: 0.4549 data: 0.0034 max mem: 22446 +train: [16] [ 80/400] eta: 0:02:35 lr: 0.000045 loss: 0.3549 (0.3544) grad: 0.1045 (0.1036) time: 0.4627 data: 0.0035 max mem: 22446 +train: [16] [100/400] eta: 0:02:23 lr: 0.000044 loss: 0.3604 (0.3576) grad: 0.1032 (0.1035) time: 0.4470 data: 0.0036 max mem: 22446 +train: [16] [120/400] eta: 0:02:12 lr: 0.000043 loss: 0.3547 (0.3576) grad: 0.0958 (0.1037) time: 0.4543 data: 0.0035 max mem: 22446 +train: [16] [140/400] eta: 0:02:02 lr: 0.000042 loss: 0.3547 (0.3574) grad: 0.0995 (0.1041) time: 0.4551 data: 0.0034 max mem: 22446 +train: [16] [160/400] eta: 0:01:52 lr: 0.000041 loss: 0.3498 (0.3547) grad: 0.1001 (0.1037) time: 0.4559 data: 0.0035 max mem: 22446 +train: [16] [180/400] eta: 0:01:43 lr: 0.000040 loss: 0.3482 (0.3551) grad: 0.0967 (0.1034) time: 0.4659 data: 0.0036 max mem: 22446 +train: [16] [200/400] eta: 0:01:33 lr: 0.000039 loss: 0.3482 (0.3555) grad: 0.1013 (0.1038) time: 0.4579 data: 0.0035 max mem: 22446 +train: [16] [220/400] eta: 0:01:24 lr: 0.000038 loss: 0.3454 (0.3541) grad: 0.0992 (0.1030) time: 0.4592 data: 0.0034 max mem: 22446 +train: [16] [240/400] eta: 0:01:14 lr: 0.000036 loss: 0.3489 (0.3544) grad: 0.0975 (0.1029) time: 0.4703 data: 0.0034 max mem: 22446 +train: [16] [260/400] eta: 0:01:05 lr: 0.000035 loss: 0.3597 (0.3559) grad: 0.0997 (0.1031) time: 0.4538 data: 0.0034 max mem: 22446 +train: [16] [280/400] eta: 0:00:55 lr: 0.000034 loss: 0.3663 (0.3565) grad: 0.1067 (0.1037) time: 0.4490 data: 0.0036 max mem: 22446 +train: [16] [300/400] eta: 0:00:47 lr: 0.000033 loss: 0.3663 (0.3573) grad: 0.1076 (0.1042) time: 0.6126 data: 0.1789 max mem: 22446 +train: [16] [320/400] eta: 0:00:37 lr: 0.000032 loss: 0.3483 (0.3572) grad: 0.1076 (0.1045) time: 0.4467 data: 0.0032 max mem: 22446 +train: [16] [340/400] eta: 0:00:28 lr: 0.000031 loss: 0.3516 (0.3580) grad: 0.1092 (0.1048) time: 0.4619 data: 0.0035 max mem: 22446 +train: [16] [360/400] eta: 0:00:18 lr: 0.000031 loss: 0.3519 (0.3580) grad: 0.1090 (0.1048) time: 0.4697 data: 0.0035 max mem: 22446 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 0.3424 (0.3563) grad: 0.0987 (0.1046) time: 0.4637 data: 0.0036 max mem: 22446 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.3253 (0.3566) grad: 0.1031 (0.1050) time: 0.4584 data: 0.0036 max mem: 22446 +train: [16] Total time: 0:03:08 (0.4717 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.3253 (0.3566) grad: 0.1031 (0.1050) +eval (validation): [16] [ 0/63] eta: 0:03:27 time: 3.2881 data: 2.9983 max mem: 22446 +eval (validation): [16] [20/63] eta: 0:00:21 time: 0.3539 data: 0.0035 max mem: 22446 +eval (validation): [16] [40/63] eta: 0:00:09 time: 0.3445 data: 0.0028 max mem: 22446 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3333 data: 0.0035 max mem: 22446 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3356 data: 0.0034 max mem: 22446 +eval (validation): [16] Total time: 0:00:24 (0.3953 s / it) +cv: [16] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.284 acc: 0.963 f1: 0.956 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [17] [ 0/400] eta: 0:22:21 lr: nan time: 3.3526 data: 3.0109 max mem: 22446 +train: [17] [ 20/400] eta: 0:03:41 lr: 0.000028 loss: 0.3398 (0.3491) grad: 0.1012 (0.1026) time: 0.4445 data: 0.0038 max mem: 22446 +train: [17] [ 40/400] eta: 0:03:06 lr: 0.000027 loss: 0.3424 (0.3567) grad: 0.1028 (0.1056) time: 0.4510 data: 0.0031 max mem: 22446 +train: [17] [ 60/400] eta: 0:02:50 lr: 0.000026 loss: 0.3591 (0.3570) grad: 0.0961 (0.1018) time: 0.4708 data: 0.0036 max mem: 22446 +train: [17] [ 80/400] eta: 0:02:35 lr: 0.000025 loss: 0.3591 (0.3595) grad: 0.0969 (0.1023) time: 0.4391 data: 0.0037 max mem: 22446 +train: [17] [100/400] eta: 0:02:23 lr: 0.000024 loss: 0.3513 (0.3564) grad: 0.1025 (0.1024) time: 0.4471 data: 0.0035 max mem: 22446 +train: [17] [120/400] eta: 0:02:13 lr: 0.000023 loss: 0.3319 (0.3527) grad: 0.0972 (0.1012) time: 0.4561 data: 0.0034 max mem: 22446 +train: [17] [140/400] eta: 0:02:02 lr: 0.000023 loss: 0.3319 (0.3506) grad: 0.0961 (0.1012) time: 0.4450 data: 0.0035 max mem: 22446 +train: [17] [160/400] eta: 0:01:52 lr: 0.000022 loss: 0.3496 (0.3510) grad: 0.0977 (0.1018) time: 0.4552 data: 0.0034 max mem: 22446 +train: [17] [180/400] eta: 0:01:42 lr: 0.000021 loss: 0.3535 (0.3519) grad: 0.0989 (0.1017) time: 0.4578 data: 0.0035 max mem: 22446 +train: [17] [200/400] eta: 0:01:33 lr: 0.000020 loss: 0.3612 (0.3522) grad: 0.0999 (0.1020) time: 0.4486 data: 0.0035 max mem: 22446 +train: [17] [220/400] eta: 0:01:23 lr: 0.000019 loss: 0.3576 (0.3526) grad: 0.0999 (0.1020) time: 0.4596 data: 0.0033 max mem: 22446 +train: [17] [240/400] eta: 0:01:14 lr: 0.000019 loss: 0.3559 (0.3522) grad: 0.0986 (0.1022) time: 0.4479 data: 0.0035 max mem: 22446 +train: [17] [260/400] eta: 0:01:04 lr: 0.000018 loss: 0.3533 (0.3525) grad: 0.1024 (0.1020) time: 0.4568 data: 0.0033 max mem: 22446 +train: [17] [280/400] eta: 0:00:55 lr: 0.000017 loss: 0.3463 (0.3513) grad: 0.1012 (0.1018) time: 0.4576 data: 0.0035 max mem: 22446 +train: [17] [300/400] eta: 0:00:47 lr: 0.000016 loss: 0.3419 (0.3514) grad: 0.1032 (0.1023) time: 0.6189 data: 0.1783 max mem: 22446 +train: [17] [320/400] eta: 0:00:37 lr: 0.000016 loss: 0.3546 (0.3520) grad: 0.1034 (0.1024) time: 0.4516 data: 0.0028 max mem: 22446 +train: [17] [340/400] eta: 0:00:28 lr: 0.000015 loss: 0.3420 (0.3517) grad: 0.0964 (0.1020) time: 0.4511 data: 0.0035 max mem: 22446 +train: [17] [360/400] eta: 0:00:18 lr: 0.000014 loss: 0.3364 (0.3505) grad: 0.0928 (0.1015) time: 0.4574 data: 0.0034 max mem: 22446 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 0.3476 (0.3514) grad: 0.0977 (0.1017) time: 0.4525 data: 0.0035 max mem: 22446 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.3621 (0.3517) grad: 0.1047 (0.1017) time: 0.4577 data: 0.0036 max mem: 22446 +train: [17] Total time: 0:03:07 (0.4689 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.3621 (0.3517) grad: 0.1047 (0.1017) +eval (validation): [17] [ 0/63] eta: 0:03:17 time: 3.1420 data: 2.9136 max mem: 22446 +eval (validation): [17] [20/63] eta: 0:00:21 time: 0.3606 data: 0.0038 max mem: 22446 +eval (validation): [17] [40/63] eta: 0:00:09 time: 0.3531 data: 0.0029 max mem: 22446 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3539 data: 0.0035 max mem: 22446 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3516 data: 0.0035 max mem: 22446 +eval (validation): [17] Total time: 0:00:25 (0.4044 s / it) +cv: [17] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.280 acc: 0.964 f1: 0.957 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [18] [ 0/400] eta: 0:23:00 lr: nan time: 3.4519 data: 3.0687 max mem: 22446 +train: [18] [ 20/400] eta: 0:03:51 lr: 0.000012 loss: 0.3475 (0.3582) grad: 0.0995 (0.1021) time: 0.4658 data: 0.0036 max mem: 22446 +train: [18] [ 40/400] eta: 0:03:08 lr: 0.000012 loss: 0.3408 (0.3537) grad: 0.0996 (0.0998) time: 0.4378 data: 0.0028 max mem: 22446 +train: [18] [ 60/400] eta: 0:02:51 lr: 0.000011 loss: 0.3346 (0.3503) grad: 0.0996 (0.0998) time: 0.4628 data: 0.0036 max mem: 22446 +train: [18] [ 80/400] eta: 0:02:36 lr: 0.000011 loss: 0.3344 (0.3473) grad: 0.1005 (0.1006) time: 0.4480 data: 0.0035 max mem: 22446 +train: [18] [100/400] eta: 0:02:24 lr: 0.000010 loss: 0.3430 (0.3544) grad: 0.1075 (0.1031) time: 0.4418 data: 0.0036 max mem: 22446 +train: [18] [120/400] eta: 0:02:13 lr: 0.000009 loss: 0.3665 (0.3543) grad: 0.1075 (0.1033) time: 0.4483 data: 0.0035 max mem: 22446 +train: [18] [140/400] eta: 0:02:02 lr: 0.000009 loss: 0.3590 (0.3540) grad: 0.1024 (0.1027) time: 0.4484 data: 0.0036 max mem: 22446 +train: [18] [160/400] eta: 0:01:52 lr: 0.000008 loss: 0.3439 (0.3529) grad: 0.0963 (0.1026) time: 0.4574 data: 0.0035 max mem: 22446 +train: [18] [180/400] eta: 0:01:43 lr: 0.000008 loss: 0.3456 (0.3528) grad: 0.1038 (0.1034) time: 0.4600 data: 0.0035 max mem: 22446 +train: [18] [200/400] eta: 0:01:33 lr: 0.000007 loss: 0.3481 (0.3518) grad: 0.1093 (0.1038) time: 0.4511 data: 0.0035 max mem: 22446 +train: [18] [220/400] eta: 0:01:23 lr: 0.000007 loss: 0.3358 (0.3509) grad: 0.1012 (0.1033) time: 0.4550 data: 0.0033 max mem: 22446 +train: [18] [240/400] eta: 0:01:14 lr: 0.000006 loss: 0.3424 (0.3499) grad: 0.0974 (0.1031) time: 0.4610 data: 0.0034 max mem: 22446 +train: [18] [260/400] eta: 0:01:05 lr: 0.000006 loss: 0.3431 (0.3509) grad: 0.0996 (0.1032) time: 0.4560 data: 0.0035 max mem: 22446 +train: [18] [280/400] eta: 0:00:55 lr: 0.000006 loss: 0.3452 (0.3504) grad: 0.1016 (0.1032) time: 0.4561 data: 0.0035 max mem: 22446 +train: [18] [300/400] eta: 0:00:47 lr: 0.000005 loss: 0.3452 (0.3505) grad: 0.1063 (0.1034) time: 0.6217 data: 0.1770 max mem: 22446 +train: [18] [320/400] eta: 0:00:37 lr: 0.000005 loss: 0.3635 (0.3505) grad: 0.1058 (0.1036) time: 0.4401 data: 0.0036 max mem: 22446 +train: [18] [340/400] eta: 0:00:28 lr: 0.000004 loss: 0.3461 (0.3509) grad: 0.1002 (0.1033) time: 0.4571 data: 0.0037 max mem: 22446 +train: [18] [360/400] eta: 0:00:18 lr: 0.000004 loss: 0.3485 (0.3506) grad: 0.1002 (0.1037) time: 0.4631 data: 0.0036 max mem: 22446 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 0.3419 (0.3498) grad: 0.1036 (0.1037) time: 0.4631 data: 0.0036 max mem: 22446 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.3333 (0.3489) grad: 0.1018 (0.1036) time: 0.4557 data: 0.0038 max mem: 22446 +train: [18] Total time: 0:03:08 (0.4703 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.3333 (0.3489) grad: 0.1018 (0.1036) +eval (validation): [18] [ 0/63] eta: 0:03:23 time: 3.2346 data: 2.9790 max mem: 22446 +eval (validation): [18] [20/63] eta: 0:00:20 time: 0.3348 data: 0.0032 max mem: 22446 +eval (validation): [18] [40/63] eta: 0:00:09 time: 0.3471 data: 0.0031 max mem: 22446 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3456 data: 0.0033 max mem: 22446 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3441 data: 0.0033 max mem: 22446 +eval (validation): [18] Total time: 0:00:24 (0.3938 s / it) +cv: [18] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.278 acc: 0.963 f1: 0.956 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:22:37 lr: nan time: 3.3943 data: 3.0111 max mem: 22446 +train: [19] [ 20/400] eta: 0:03:53 lr: 0.000003 loss: 0.3454 (0.3368) grad: 0.0937 (0.0960) time: 0.4756 data: 0.0039 max mem: 22446 +train: [19] [ 40/400] eta: 0:03:11 lr: 0.000003 loss: 0.3420 (0.3418) grad: 0.0968 (0.1008) time: 0.4435 data: 0.0034 max mem: 22446 +train: [19] [ 60/400] eta: 0:02:54 lr: 0.000002 loss: 0.3395 (0.3449) grad: 0.1001 (0.1010) time: 0.4752 data: 0.0037 max mem: 22446 +train: [19] [ 80/400] eta: 0:02:39 lr: 0.000002 loss: 0.3404 (0.3438) grad: 0.1001 (0.1001) time: 0.4529 data: 0.0035 max mem: 22446 +train: [19] [100/400] eta: 0:02:26 lr: 0.000002 loss: 0.3422 (0.3470) grad: 0.0989 (0.1001) time: 0.4462 data: 0.0036 max mem: 22446 +train: [19] [120/400] eta: 0:02:14 lr: 0.000002 loss: 0.3421 (0.3475) grad: 0.0960 (0.0994) time: 0.4526 data: 0.0035 max mem: 22446 +train: [19] [140/400] eta: 0:02:03 lr: 0.000001 loss: 0.3265 (0.3455) grad: 0.0947 (0.0991) time: 0.4419 data: 0.0036 max mem: 22446 +train: [19] [160/400] eta: 0:01:53 lr: 0.000001 loss: 0.3353 (0.3463) grad: 0.1000 (0.0995) time: 0.4411 data: 0.0036 max mem: 22446 +train: [19] [180/400] eta: 0:01:43 lr: 0.000001 loss: 0.3505 (0.3465) grad: 0.1011 (0.0992) time: 0.4611 data: 0.0035 max mem: 22446 +train: [19] [200/400] eta: 0:01:33 lr: 0.000001 loss: 0.3336 (0.3456) grad: 0.0956 (0.0994) time: 0.4540 data: 0.0036 max mem: 22446 +train: [19] [220/400] eta: 0:01:24 lr: 0.000001 loss: 0.3500 (0.3469) grad: 0.0998 (0.1003) time: 0.4499 data: 0.0034 max mem: 22446 +train: [19] [240/400] eta: 0:01:14 lr: 0.000001 loss: 0.3429 (0.3465) grad: 0.0998 (0.1000) time: 0.4662 data: 0.0035 max mem: 22446 +train: [19] [260/400] eta: 0:01:05 lr: 0.000000 loss: 0.3400 (0.3470) grad: 0.1042 (0.1005) time: 0.4573 data: 0.0034 max mem: 22446 +train: [19] [280/400] eta: 0:00:55 lr: 0.000000 loss: 0.3544 (0.3484) grad: 0.1054 (0.1007) time: 0.4612 data: 0.0035 max mem: 22446 +train: [19] [300/400] eta: 0:00:47 lr: 0.000000 loss: 0.3656 (0.3493) grad: 0.1002 (0.1007) time: 0.6555 data: 0.2067 max mem: 22446 +train: [19] [320/400] eta: 0:00:38 lr: 0.000000 loss: 0.3711 (0.3495) grad: 0.1002 (0.1011) time: 0.4343 data: 0.0026 max mem: 22446 +train: [19] [340/400] eta: 0:00:28 lr: 0.000000 loss: 0.3478 (0.3485) grad: 0.1009 (0.1009) time: 0.4485 data: 0.0036 max mem: 22446 +train: [19] [360/400] eta: 0:00:18 lr: 0.000000 loss: 0.3433 (0.3478) grad: 0.1009 (0.1013) time: 0.4581 data: 0.0036 max mem: 22446 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 0.3437 (0.3478) grad: 0.1016 (0.1014) time: 0.4615 data: 0.0037 max mem: 22446 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.3378 (0.3474) grad: 0.0996 (0.1012) time: 0.4536 data: 0.0035 max mem: 22446 +train: [19] Total time: 0:03:08 (0.4722 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.3378 (0.3474) grad: 0.0996 (0.1012) +eval (validation): [19] [ 0/63] eta: 0:03:24 time: 3.2423 data: 3.0148 max mem: 22446 +eval (validation): [19] [20/63] eta: 0:00:20 time: 0.3360 data: 0.0044 max mem: 22446 +eval (validation): [19] [40/63] eta: 0:00:09 time: 0.3415 data: 0.0030 max mem: 22446 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3516 data: 0.0033 max mem: 22446 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3445 data: 0.0032 max mem: 22446 +eval (validation): [19] Total time: 0:00:24 (0.3939 s / it) +cv: [19] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.278 acc: 0.964 f1: 0.956 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.9635416666666666, "hparam": [22, 1.0], "hparam_id": 43, "epoch": 19, "is_best": false, "best_score": 0.9635416666666666} +eval (train): [20] [ 0/297] eta: 0:14:56 time: 3.0182 data: 2.7710 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:23 time: 0.3933 data: 0.0042 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:52 time: 0.3546 data: 0.0031 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:37 time: 0.3523 data: 0.0033 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:26 time: 0.3624 data: 0.0035 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:17 time: 0.3732 data: 0.0035 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:08 time: 0.3458 data: 0.0036 max mem: 22446 +eval (train): [20] [140/297] eta: 0:00:59 time: 0.3478 data: 0.0036 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:51 time: 0.3535 data: 0.0035 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:43 time: 0.3677 data: 0.0038 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:36 time: 0.3785 data: 0.0035 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:28 time: 0.3599 data: 0.0034 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:21 time: 0.3597 data: 0.0033 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3651 data: 0.0035 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3691 data: 0.0036 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3554 data: 0.0031 max mem: 22446 +eval (train): [20] Total time: 0:01:50 (0.3729 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:14 time: 3.0798 data: 2.8475 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3580 data: 0.0309 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3517 data: 0.0058 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3284 data: 0.0021 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3255 data: 0.0024 max mem: 22446 +eval (validation): [20] Total time: 0:00:24 (0.3932 s / it) +eval (test): [20] [ 0/79] eta: 0:04:09 time: 3.1555 data: 2.8764 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:30 time: 0.3763 data: 0.0229 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:17 time: 0.3630 data: 0.0030 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3596 data: 0.0035 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3361 data: 0.0032 max mem: 22446 +eval (test): [20] Total time: 0:00:31 (0.3993 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.9635416666666666, "hparam": [22, 1.0], "hparam_id": 43, "epoch": 17, "is_best": true, "best_score": 0.9635416666666666} +eval (train): [20] [ 0/297] eta: 0:15:41 time: 3.1711 data: 2.8793 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:26 time: 0.3969 data: 0.0150 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:54 time: 0.3588 data: 0.0033 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:39 time: 0.3604 data: 0.0035 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:26 time: 0.3354 data: 0.0032 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:16 time: 0.3620 data: 0.0035 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:08 time: 0.3808 data: 0.0036 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:00 time: 0.3824 data: 0.0036 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:52 time: 0.3489 data: 0.0035 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:44 time: 0.3556 data: 0.0032 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:36 time: 0.3446 data: 0.0030 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:29 time: 0.3804 data: 0.0037 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:21 time: 0.3799 data: 0.0037 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3679 data: 0.0035 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3738 data: 0.0036 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3193 data: 0.0033 max mem: 22446 +eval (train): [20] Total time: 0:01:51 (0.3749 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:58 time: 3.7810 data: 3.5348 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:21 time: 0.3446 data: 0.0028 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:10 time: 0.3726 data: 0.0034 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3339 data: 0.0035 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3323 data: 0.0035 max mem: 22446 +eval (validation): [20] Total time: 0:00:25 (0.4084 s / it) +eval (test): [20] [ 0/79] eta: 0:03:57 time: 3.0031 data: 2.7628 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:28 time: 0.3570 data: 0.0035 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3655 data: 0.0035 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3856 data: 0.0034 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3324 data: 0.0031 max mem: 22446 +eval (test): [20] Total time: 0:00:31 (0.3978 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|-------:|-----:|------------:|:----------|:-----------|-----------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | hcpya_task21 | best | 17 | 0.0066 | 0.05 | 43 | [22, 1.0] | train | 0.00010309 | 1 | 0 | 1 | 0 | +| flat_mae | patch | attn | hcpya_task21 | best | 17 | 0.0066 | 0.05 | 43 | [22, 1.0] | validation | 0.2797 | 0.96354 | 0.002988 | 0.9565 | 0.0038572 | +| flat_mae | patch | attn | hcpya_task21 | best | 17 | 0.0066 | 0.05 | 43 | [22, 1.0] | test | 0.30021 | 0.96329 | 0.0025358 | 0.95455 | 0.0034499 | + + +done! total time: 1:19:51 diff --git a/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/train_log.json b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..9ccc45f6de780fe18e6a43622f0617e78921e028 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/hcpya_task21__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.342913869023323, "train/grad": 0.2280800932645798, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.034132080078125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.031676025390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.027474365234375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.023292236328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.019185791015625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.013499755859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.007122802734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.000057373046875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.99086181640625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.9810205078125, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.971497802734375, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.957354736328125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9436083984375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.9240655517578125, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.905673828125, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.888208312988281, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.8661868286132814, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.8406292724609377, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.81197265625, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.7851686096191406, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.7519265747070314, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.7169965744018554, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.6773509216308593, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.6337173843383788, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.587096586227417, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.5298401355743407, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.4774909138679506, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.429008994102478, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.3630234384536744, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.285081194639206, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.216509312391281, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.1553082263469694, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.0748480159044265, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.005260097682476, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.9255365484952927, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.8497318091988564, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.772245247066021, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.701781759262085, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.6280403831601142, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.543269058316946, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.4829560205340386, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.4327752429246903, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.370015895664692, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.3199568068981171, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.267655809521675, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.2141804111003875, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.1688148445636033, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.1199804694205522, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.0796944474428891, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0404021395649761, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04032076418399811, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.040185068203136325, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.040049238624051214, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03991315949708223, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03972328587435186, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03950985790230334, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.039269739473238585, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03895072708837688, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.038606838788837194, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03826228604651988, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0377449263073504, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03724022299051285, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.036514395819976925, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.035836449218913914, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03521287204697728, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.034473098143935205, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.033683327743783596, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03290245377458632, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.032260108757764104, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03155584519729018, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.030912513835355638, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.030273481914773585, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.029662270583212375, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.029086204739287497, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.028465595804154874, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.027964039975777268, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.027544543063268066, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.027031297287903724, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.026510576717555523, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.026132888509891927, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02586952194571495, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.025654538851231336, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.025626543844118714, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.025758915506303312, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.025959644839167594, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.026289836643263696, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.026633104318752886, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02700953369960189, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02739804968237877, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.027786535331979393, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.027980264201760294, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.028115034215152265, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.028241218151524663, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.028479717150330545, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.028522769724950194, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.028638674570247533, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.029007260855287313, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.02914476618170738, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.9999492168426514, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.992506265640259, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.9803426265716553, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.9683544635772705, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.9565467834472656, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.94046950340271, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.922475576400757, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.9026854038238525, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.877408504486084, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8511321544647217, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.826087236404419, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.789602756500244, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7556705474853516, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.709068536758423, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.666522741317749, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.6274704933166504, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.579488515853882, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5242562294006348, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.463047504425049, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.405808687210083, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.334390640258789, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.259352445602417, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.174022912979126, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.0807833671569824, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.9821901321411133, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.8642902374267578, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.7601014375686646, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.667811632156372, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.5485786199569702, "validation/loss_029_lr2.3e+00_wd1.0e+00": 1.417918086051941, "validation/loss_030_lr2.7e+00_wd1.0e+00": 1.3108441829681396, "validation/loss_031_lr3.1e+00_wd1.0e+00": 1.2203761339187622, "validation/loss_032_lr3.7e+00_wd1.0e+00": 1.1081554889678955, "validation/loss_033_lr4.3e+00_wd1.0e+00": 1.0220365524291992, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.9341375231742859, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.8568691611289978, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.7863438129425049, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.7337074875831604, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.6832047700881958, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.6345597505569458, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.601654052734375, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5830603837966919, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5717849135398865, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5457582473754883, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5262541770935059, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5168272852897644, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5072992444038391, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.48106279969215393, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.43221744894981384, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.10987103174603174, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.11011904761904762, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.10987103174603174, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.12103174603174603, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.14657738095238096, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.19915674603174602, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.23462301587301587, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.24603174603174602, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.24280753968253968, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.2353670634920635, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.23214285714285715, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2279265873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22594246031746032, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.23040674603174602, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.23660714285714285, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.24578373015873015, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2619047619047619, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.28273809523809523, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.3087797619047619, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.33209325396825395, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.3583829365079365, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.3759920634920635, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.3960813492063492, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.41195436507936506, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.4312996031746032, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.45634920634920634, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.4771825396825397, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.4982638888888889, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.5347222222222222, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.5696924603174603, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.5989583333333334, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.6277281746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.6547619047619048, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.6750992063492064, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.7018849206349206, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.7333829365079365, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.7529761904761905, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.7730654761904762, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.7859623015873016, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.794890873015873, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8013392857142857, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8090277777777778, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8139880952380952, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8263888888888888, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8273809523809523, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8340773809523809, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8373015873015873, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8462301587301587, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8645833333333334, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.028792300630258, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.02730015092574369, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.027503717568723458, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.031570120862936146, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.040207413556603636, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.04970864091851554, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.050108811316636075, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04883456126643933, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.049370317501414206, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.05006309078876094, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.053752500456681875, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.05627943843328086, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.05805497453866129, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.06269966484012686, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.06607643132895266, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.06996028337777975, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.07634621322483073, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.08653064324895947, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.09756833020537359, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.11190892110362004, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.12919008219545022, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1419250229409515, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.15605820566795203, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.16886872656807617, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18857683559824842, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.22299838130907604, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.25095369524426125, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.28139225810131263, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.3341795071308489, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.3987903422908162, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.44551474100271526, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.4922588457499707, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.538572782832181, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.5713494063322926, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.6205764226176284, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.6692579719829609, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.6958820404769412, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.7227025401852742, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.7419526395520906, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.7573706063961312, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.7646299162515846, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.7736424084451996, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.777997220539441, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.7961618202130587, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.7956666242209355, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.7985513020590602, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.801015695382195, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8219241595617314, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8392285253367382, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 1.0796944474428891, "validation/loss_best": 0.43221744894981384, "validation/acc_best": 0.8645833333333334, "validation/f1_best": 0.8392285253367382} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.5343935042619705, "train/grad": 0.19384867660701274, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.939273681640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.9240869140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.8997723388671877, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.8762982177734373, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.853975830078125, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.8241632080078123, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.7921051025390624, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.7584878540039064, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.7165606689453123, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.6746279907226564, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.6354920959472654, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.5797095489501953, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.528067207336426, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4563945388793944, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.390267448425293, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3283304882049563, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.251530146598816, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1630512356758116, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.06660359621048, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9793495684862137, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8752523648738861, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.772282935976982, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.6625614267587663, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.5507404482364655, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.4404271492362022, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.3174859753251076, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.2168713772296906, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.133703016936779, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.033287275135517, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.9312852500379085, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.8552701327204705, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.7968189819157123, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.7327110059559345, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.6840116396546364, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.6338647964596749, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.589039511680603, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.5478151573240757, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.5164394274353981, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.48597853250801565, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.44784937098622324, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.4186369613558054, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.3956840152293444, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.3747052453830838, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.3611558273434639, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.34684432704001666, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.34271471343934534, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.35040483189746735, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.33947217304259536, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.3938215524703264, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.036758076725527646, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03622846982441843, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.035364976888522505, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03453002884984016, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03373080485500395, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03267608062364161, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03158328772522509, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.030498928781598808, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0292873315513134, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02823063305579126, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.027383260084316133, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.026387775149196387, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02565500808879733, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02487597399391234, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.024335456294938922, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02393801050260663, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023533285362645984, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.023140775617212058, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022760011581704022, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022449593832716344, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022132849786430597, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.021895431028679014, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.021767517756670712, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02183189150877297, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022144052674993874, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.022768321987241505, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.023451732844114302, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.024130771569907665, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02513094206340611, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02615030713379383, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.027019937317818404, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.027716472120955585, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02853681121021509, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02884147338569164, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.028846202613785864, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.028539785677567125, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.028192976359277963, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.027969069057144226, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02767300440464169, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.026873716595582665, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02593635981436819, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.025371420099399983, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.025200492609292267, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.025459252819418906, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.025347976703196762, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.025870318845845758, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.02758228045888245, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.027875727387145163, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03270801797043532, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.8669676780700684, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.843384265899658, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.8063156604766846, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.771723508834839, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.7391722202301025, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.6972124576568604, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.653088331222534, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.60749888420105, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.551745653152466, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.4959065914154053, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.4437878131866455, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.3688244819641113, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.298783779144287, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.2011985778808594, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.1109421253204346, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.0272040367126465, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.9247040748596191, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.8103301525115967, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.6910181045532227, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.58811616897583, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.4711958169937134, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.3597615957260132, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.244575023651123, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.1320887804031372, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.0295250415802002, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.920577347278595, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.84104323387146, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.7829837203025818, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.7249413728713989, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.6614136695861816, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.6137033700942993, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.5795009732246399, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.5434919595718384, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.5198915004730225, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.5038899779319763, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.4934152364730835, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.4702400863170624, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.44032829999923706, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.40796953439712524, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3851078152656555, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.387474000453949, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.38223350048065186, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3636941611766815, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4056830108165741, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.36016523838043213, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.36307471990585327, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.37265729904174805, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.5603668093681335, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.528840184211731, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.24206349206349206, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.23859126984126985, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.23834325396825398, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.2378472222222222, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.23883928571428573, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.24330357142857142, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.2517361111111111, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.2631448412698413, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.2760416666666667, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.2924107142857143, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.3115079365079365, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.3392857142857143, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.36607142857142855, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.39037698412698413, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.41369047619047616, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.4283234126984127, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.44642857142857145, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.4712301587301587, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.5022321428571429, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.5235615079365079, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.5625, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.5939980158730159, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.6232638888888888, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.6488095238095238, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.6753472222222222, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.7204861111111112, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.7440476190476191, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.7599206349206349, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.7678571428571429, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.7839781746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.800843253968254, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8132440476190477, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8246527777777778, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8325892857142857, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8330853174603174, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8363095238095238, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8444940476190477, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8576388888888888, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8697916666666666, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.876984126984127, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8764880952380952, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.878968253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.886656746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8757440476190477, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8901289682539683, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8936011904761905, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8985615079365079, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8784722222222222, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8968253968253969, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.04451100442375055, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.04675331749728835, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.05434361895179986, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.057431357639965666, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.060335214785900405, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.0643172236620885, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.06839253130995833, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.07251965005822504, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.07849562537525397, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08655271067966881, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10019395210014455, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.11992082146321444, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1392191547146341, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15628195159785682, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.17317795805803474, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19108640059224694, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.21203854287764182, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.24522761847407634, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.28832829087591333, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.32186689218811904, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.38737710318881957, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.4370897948800656, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.4853165694252068, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.5246504019326752, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.5740592906238973, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.6534074616496018, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.689065545491939, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.7092900262137333, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.7187987076907417, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.7404626137066297, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.7615101340138418, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.7803029933641488, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.7976283865189422, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8090255415955449, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8112783081021507, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8143347055853738, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8229212941684813, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8364151872487392, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8477847573255246, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8561065439991893, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8565235667924814, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8634516081162399, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8745823060649623, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8626435545018021, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8733919409443202, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8767581797977005, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8765149895309211, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8370975671031393, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8774389594133643, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 0.35040483189746735, "validation/loss_best": 0.37265729904174805, "validation/acc_best": 0.8985615079365079, "validation/f1_best": 0.8765149895309211} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 1.2064301002025604, "train/grad": 0.2185975307971239, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.780958251953125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.749723205566406, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.701399230957031, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.6572015380859373, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.6163507080078126, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.5631805419921876, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.5071045684814455, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.448528594970703, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.375818328857422, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.302644958496094, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.2340626525878906, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.1365270519256594, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.047318572998047, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.9271140444278716, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.8212598311901091, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.727645097374916, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.6182515305280685, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.500776686668396, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.381155098080635, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.2791910856962203, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.1647647866606712, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.059607271552086, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.9583692580461503, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.8674102176725864, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.788503930568695, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.7096916308999062, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.6511807356774807, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.6064185680449009, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.5559140564501286, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.5063611504435539, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.46976864464581014, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.441762979850173, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.41078264489769933, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.3870041736215353, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.3610641241818666, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.3347338838875294, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.3056453835964203, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.2786133982613683, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.2543172630853951, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.2416464607603848, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.24149415403604507, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.2376771586947143, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.2430177347920835, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.27345183040946724, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.30887046970427035, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.3641112047433853, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.5265279486682266, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.9233560318127274, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.2667973901703953, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0317841705866158, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.030691577428951858, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02913816476240754, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.027911233128979803, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02695582987740636, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.025974897546693684, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.025200833762064577, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.024605133421719074, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.024058552226051688, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0236301377043128, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023289100443944336, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022854087147861718, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02248951341956854, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022043063919991255, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021700328718870877, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021454640841111542, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021273405365645884, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0212575411144644, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021536677246913313, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022081569992005824, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02306739524472505, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.024242809317074718, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02551350344903767, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.026626854790374636, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.027350199818611146, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.027597252232953905, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.027488642912358044, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.027325198519974947, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0270914026722312, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.026726813800632954, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.026293397694826127, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.025881254989653827, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.025370110869407655, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02499365814961493, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.024558072187937798, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02393710957840085, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02313741425052285, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02236912733409554, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.021807926166802646, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02262315565254539, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.023201168845407665, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02296711205970496, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.024242649460211395, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.026216890928335488, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02987919909879565, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03414526409003884, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.043816413525491954, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.06737525793723761, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.09358429652638733, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.694931745529175, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.657360315322876, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.600322723388672, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.548366069793701, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.500048875808716, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.4368109703063965, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.369508981704712, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.298635721206665, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.2102434635162354, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.12129282951355, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.0387282371520996, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.9228652715682983, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.8194351196289062, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.6847034692764282, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.5704716444015503, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.4721674919128418, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.358994483947754, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.2383174896240234, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.1162874698638916, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.0173851251602173, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.9180310368537903, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.8355439901351929, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.7634652256965637, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.7020248174667358, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.6468377709388733, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5931406617164612, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5543473362922668, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.5270479321479797, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.4986356794834137, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.47102218866348267, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.45109766721725464, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.4361346662044525, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.41512009501457214, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.3953264653682709, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.36612221598625183, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.33286672830581665, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.313119500875473, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.29182329773902893, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2725026309490204, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.30119413137435913, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3140696585178375, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.30005887150764465, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.30778005719184875, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.42535755038261414, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5942381024360657, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7723048329353333, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.8935211300849915, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.7277878522872925, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.3240981101989746, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.24231150793650794, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.24801587301587302, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.26240079365079366, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.27628968253968256, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.29017857142857145, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.31572420634920634, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.33903769841269843, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.3611111111111111, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.3787202380952381, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.3990575396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.41617063492063494, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.4494047619047619, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.4756944444444444, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.5121527777777778, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.5456349206349206, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.5694444444444444, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.5997023809523809, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.6336805555555556, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.6617063492063492, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.6899801587301587, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.7125496031746031, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.7400793650793651, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.7606646825396826, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.7790178571428571, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.7931547619047619, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8070436507936508, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8189484126984127, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8271329365079365, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8375496031746031, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8479662698412699, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8529265873015873, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8556547619047619, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8640873015873016, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8680555555555556, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8787202380952381, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8955853174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9007936507936508, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9097222222222222, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9161706349206349, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9129464285714286, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9097222222222222, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9166666666666666, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9216269841269841, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8958333333333334, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8821924603174603, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8794642857142857, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.894593253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8876488095238095, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8737599206349206, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.065662142312116, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.06856160956549236, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.07352426934937853, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.08020311286998685, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.08754262127581634, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.10230934697889062, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.11539628971041216, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.1314376380722318, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.1441998323173054, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.15852608655333927, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.17694269321115394, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.22142919909754336, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.2561018132186906, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.3028129188689087, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.3501436395210755, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.38658151327714046, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.4369238473561306, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.497985677334215, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.5464700830611638, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.593186298639258, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.6344433368014298, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.6774017510916255, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7095214106478783, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.7353397081204834, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.7520491054524919, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.7731952642522332, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.7878762131533793, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.7969447371501853, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8100991176608658, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8235228692917695, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.829438858952019, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8342257667111296, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8424086482109902, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8475440608403672, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8604535192694511, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8800898691356084, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8842282842475249, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8942120649781564, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9033767711490893, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8913841032642227, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8900348110288744, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8917077896286133, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8953451153523292, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8627178588334671, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8460493282266179, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8446305177395608, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8765177156159043, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8592566519383054, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8510721899583936, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 0.2430177347920835, "validation/loss_best": 0.30778005719184875, "validation/acc_best": 0.9216269841269841, "validation/f1_best": 0.8953451153523292} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 1.1337577852606773, "train/grad": 0.40039888076484204, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.6146881103515627, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.5710926818847657, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.503785934448242, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.4413320922851565, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.3827875518798827, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.3057373428344725, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.2234973526000976, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.1375413703918458, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.0319239902496338, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.9285154342651367, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.8349715673923492, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.7083774256706237, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.5991920351982116, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.4603483146429062, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.3433129918575286, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.242283932864666, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.1277397412061692, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.0115070468187333, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.9039734381437302, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.8228094285726547, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.7421829350292682, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.6756236360967159, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6160068824887276, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.5647099291533232, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5196574087440967, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.4751384097337723, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.44198430955410006, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.4160782707110047, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.3858809008449316, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.35376399949193, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.3266708920150995, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.3044035210832953, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.27718716111034153, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.25241935277357697, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.22709322668612003, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.20984414648264646, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.1980610035918653, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.18375779854133725, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1783032463863492, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.19733560306951403, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.20444318750873208, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.22912964408285916, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.27892006499692795, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.37515547063201665, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.6676012862473726, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.1837515822798013, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.601158661544323, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.147039904780686, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.125412736535072, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.026276620831340552, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.025520089874044062, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.024685510639101266, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02414935384877026, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023765643583610652, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023352640252560376, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02296912983059883, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022594141662120818, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022158250398933887, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02175381329841912, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02141798441298306, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021035680007189514, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020808555358089505, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02072982274927199, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020948216803371906, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02142742543015629, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022347486596554517, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02363633707165718, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.024975631004199386, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.025933107556775213, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.026634484743699432, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02682910584844649, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.026732397023588417, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.026451424593105913, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02601951712742448, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.025698917591944336, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.025565406922250986, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02545180734246969, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.025214044810272753, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.024600486438721418, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.023819594709202647, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02321242026053369, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02254797931294888, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.021772520397789777, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.021038469588384033, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.020601983782835304, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.020494227893650532, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02007483821362257, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.020025023226626217, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.022733567180112005, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.023382422081194818, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.025596668985672297, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.029221761762164532, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.036730777323246006, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05720308336429298, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08492809846997261, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.10539829543791711, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.19732175556942821, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.24998091131448746, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.5232038497924805, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.473085880279541, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.395318031311035, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.323221445083618, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.2554354667663574, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.166609764099121, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.072739601135254, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.9758074283599854, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.8594579696655273, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7485164403915405, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.651002049446106, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.5222764015197754, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.4130915403366089, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.2750117778778076, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.1589887142181396, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.061747670173645, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.9583722949028015, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.8625584840774536, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.7820985317230225, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.722798228263855, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.6633304357528687, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.6150923371315002, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.5736969113349915, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.5384609699249268, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.5091574788093567, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.4784731864929199, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.45571333169937134, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.4377102851867676, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.41675490140914917, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.3961678445339203, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.38292306661605835, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.36452940106391907, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.3352230489253998, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.3211575448513031, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.3004959523677826, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.28737297654151917, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.28930169343948364, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2744881808757782, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.23827014863491058, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.32983872294425964, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2748293876647949, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5894421935081482, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.7826354503631592, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.0815876722335815, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.3649760484695435, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.6541874408721924, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.2198286056518555, "validation/loss_047_lr4.3e+01_wd1.0e+00": 8.320958137512207, "validation/loss_048_lr5.0e+01_wd1.0e+00": 6.756789684295654, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.28348214285714285, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.30084325396825395, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.330109126984127, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.35788690476190477, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.37177579365079366, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3948412698412698, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.41294642857142855, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.4375, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.46205357142857145, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.48834325396825395, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.513640873015873, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5570436507936508, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5848214285714286, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.621031746031746, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6537698412698413, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6736111111111112, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7100694444444444, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7378472222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7571924603174603, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7800099206349206, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.794890873015873, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8110119047619048, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8201884920634921, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8311011904761905, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8380456349206349, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8459821428571429, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8536706349206349, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8576388888888888, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8655753968253969, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8712797619047619, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8764880952380952, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8804563492063492, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8931051587301587, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8943452380952381, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9015376984126984, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9067460317460317, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9064980158730159, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9181547619047619, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9268353174603174, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9109623015873016, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9265873015873016, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8772321428571429, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8735119047619048, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8536706349206349, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8802083333333334, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8814484126984127, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8655753968253969, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8464781746031746, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8700396825396826, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08058165245828373, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.09024073161587974, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.10993828948598058, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1301638028128572, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.1408804831158463, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.15723433637489578, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.17024663428268103, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.20333736166892138, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.23374045529023593, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.2678841828383499, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.3031369730135888, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.37458104773788514, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.42201614773277735, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.481107586021444, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.5295131038441453, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.5700961095158694, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6361747367213036, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.6812593711509741, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.7058598867501468, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7326558638333325, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.752743443251387, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7748590508793556, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.788099644246481, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8024406977967431, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8108109646416227, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8210681350201109, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8310961615776696, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8344969082016082, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8443865747541288, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.848547239931819, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.850648336298872, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8559348835012902, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8708252443471084, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8726359438691205, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8832471737536555, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8927846217929705, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8878865962424136, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8997743720801553, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9142834512938813, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8883976835528145, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9102168142656492, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8579017010619351, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8476218950784797, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8043071445125337, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8434160027272357, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8650738490674806, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8231094265786765, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.823283364748737, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8435410490670104, "id_best": 38, "lr_best": 0.00294, "wd_best": 0.05, "train/loss_best": 0.1783032463863492, "validation/loss_best": 0.23827014863491058, "validation/acc_best": 0.9268353174603174, "validation/f1_best": 0.9142834512938813} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 1.4080271163582803, "train/grad": 0.8634614960849285, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.4384442138671876, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.3806572723388673, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.290873336791992, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.2075865173339846, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.1300033378601073, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.0296889114379884, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.9260095834732056, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.8217858982086181, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.7004105335474013, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.5877224254608153, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.4902018958330154, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.3616743487119676, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.251913591325283, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.1140761616826058, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.0038854065537453, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.9165424370765686, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.8270908957719803, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.7443115465342999, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.671536075770855, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.61708072245121, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.5621024920046329, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.5156104171276092, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.47305098943412305, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.4344446042925119, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.3990024506300688, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.36226200133562086, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.3341444825381041, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.31177407927811146, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.2846204628050327, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.25260100334882735, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.2281608358770609, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.2109229987487197, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.1947971298545599, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.17702029768377542, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.163178520295769, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.14980248868465423, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.14023219632916153, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.14276542100124062, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1738287391513586, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.2359217062406242, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.2669601707253605, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.36978599640540777, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.7668486101087183, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.8039984002895654, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.3373601854592563, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.5524646623898297, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.269089908003807, "train/loss_047_lr4.3e+01_wd1.0e+00": 11.203385640382766, "train/loss_048_lr5.0e+01_wd1.0e+00": 12.165697870254517, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02411108328960836, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02373536949045956, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023265233412384987, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022891723802313208, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022569111911579967, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02217575430870056, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02179550047032535, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02144861393608153, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021116733243688943, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020918147303164006, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020872061969712377, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02106831652112305, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021554285273887218, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022708816742524503, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.023999175941571593, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.025043240459635855, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.025875437557697296, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.026210566041991113, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.026133768409490585, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02594191286712885, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0256513390224427, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02540523501113057, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02517328826710582, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.024844914926216006, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.024251187881454826, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.023471203120425343, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022916583213955163, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.022461906326934695, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02185129817109555, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.021028377246111632, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02056924961041659, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02034502349793911, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0201812409539707, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.01939434590982273, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.019009755998849867, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.018657977047841997, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.018517861105501653, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.019641295671463013, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.022293462578672915, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.027323083449155093, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.029567244423087686, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0372273318329826, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06235735623165965, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06384992799721659, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.09657949730753898, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.11232980839908123, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.251303217895329, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.5385260628163815, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.43847382828593257, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.347627639770508, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.2826285362243652, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.181796073913574, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.0888068675994873, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.003030300140381, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.8937726020812988, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.7827340364456177, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.6734719276428223, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.54861581325531, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.4342458248138428, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.3355218172073364, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.2051563262939453, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.0954639911651611, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.9655823707580566, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.8697194457054138, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.797907829284668, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.7272695899009705, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.6646444201469421, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.6122511625289917, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.5739409923553467, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.5359259843826294, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.5043385624885559, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.47573572397232056, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.45078611373901367, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.4296228587627411, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.4049764573574066, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.3855542242527008, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.37168648838996887, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.35557854175567627, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.3287097215652466, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.32474419474601746, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.3217407464981079, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.3113187849521637, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.2948131859302521, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2879641056060791, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.27370285987854004, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3363344669342041, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2984030544757843, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.41170749068260193, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5762305855751038, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.598948061466217, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.7470265626907349, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.3650866746902466, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.1665780544281006, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.2044334411621094, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.9505162239074707, "validation/loss_046_lr3.6e+01_wd1.0e+00": 5.254217624664307, "validation/loss_047_lr4.3e+01_wd1.0e+00": 15.28943920135498, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.34672619047619047, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.3663194444444444, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3888888888888889, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.408234126984127, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.4273313492063492, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4548611111111111, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.48313492063492064, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5086805555555556, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5458829365079365, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5793650793650794, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6049107142857143, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6374007936507936, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.669890873015873, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.7110615079365079, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7420634920634921, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7611607142857143, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7795138888888888, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7963789682539683, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8125, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8209325396825397, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8313492063492064, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8407738095238095, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8511904761904762, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8586309523809523, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8625992063492064, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8717757936507936, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8774801587301587, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8809523809523809, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.888640873015873, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8933531746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8983134920634921, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9010416666666666, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9079861111111112, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9136904761904762, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9176587301587301, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9206349206349206, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9126984126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9248511904761905, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.902281746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8903769841269841, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9025297619047619, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9035218253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8799603174603174, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.886656746031746, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8903769841269841, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8807043650793651, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8921130952380952, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8628472222222222, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.12011203895085834, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.13581940575208845, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.15246150365779462, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1674176237932443, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.18882128481259244, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.22396894454422403, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.2612342056083146, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.2919324363318062, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.34419471078245295, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.3984089346140095, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.44602300545782725, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.5061135974072298, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5589870166532991, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.634005828131982, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.6858259601325091, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.7113307206143626, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.7359532922652035, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.7580609087249793, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.7791095033558455, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7912600004350943, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8054373114324109, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8158425510921202, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8301697839740586, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8392114094688823, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8438344654426964, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.855934964624321, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8616826680642204, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8650924261765324, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8736376268960347, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8792828965454146, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8859470925075049, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8893835183404035, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8977576053940106, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9033797036146342, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.906149604493096, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9097558414961634, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9032895106997516, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9094071425680517, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8908023849248929, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8583818322048453, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8949813714570529, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8842587651054831, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8574503846607416, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8619805494694998, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8809822620469745, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8592872526221992, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8711531262290445, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8385153814592013, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 37, "lr_best": 0.00249, "wd_best": 0.05, "train/loss_best": 0.14276542100124062, "validation/loss_best": 0.2984030544757843, "validation/acc_best": 0.9248511904761905, "validation/f1_best": 0.9094071425680517} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 1.2262370672821998, "train/grad": 0.7306421679258347, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.265858612060547, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.192963466644287, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.08061185836792, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.978896803855896, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.886675899028778, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.771861159801483, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.658278591632843, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.5486602544784547, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.4245138853788375, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.3100333997607232, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.210128377377987, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.0793403196334839, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.9738343393802643, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.854993403851986, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.7697009809315205, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.7057384943962097, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.6411757245659828, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.5817445708811283, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.529169708788395, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.4891639243066311, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.44778170228004455, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.41157641254365446, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.3771180886030197, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.3452103142440319, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.3160153544321656, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.28591395225375893, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.26266288295388224, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.24179276507347822, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.21413984075188636, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.1853889673575759, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.16871901838108896, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.15756413027644156, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.14169980343431235, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.12738075968809426, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.11597825498320162, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.1188252193108201, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.1339315895922482, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.16176750579848886, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.17405592801980674, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.29067453750409183, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.5338082181289792, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.7544183893036097, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.7831750468444079, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.9616526595316828, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.6242508478462696, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.4504893460869788, "train/loss_046_lr3.6e+01_wd1.0e+00": 6.242175876200199, "train/loss_047_lr4.3e+01_wd1.0e+00": 13.057231267690659, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023058986030519008, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022747112382203342, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02229636872187257, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021915317457169296, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021592289116233586, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02123444614931941, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020951221259310843, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020790294823236764, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020801923125982284, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021085406485944987, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02161642353516072, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022776229651644827, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023969433335587383, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.025186140444129704, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02567027441225946, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02577027284540236, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02565687940455973, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.025401687659323217, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02504338510800153, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.024648436470888557, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.024106538342311978, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02359124688897282, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.023094079401344062, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.022599397827871143, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02219353988301009, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.021918106731027363, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.021749043324962258, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02149770812597126, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02089565637521446, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.020332160592079162, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.019916470660828054, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.019520881434436886, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.018606369288172574, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.018101510193664582, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.017319042265880852, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.017639032762963323, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.01959199266973883, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.022730939944740385, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.023418467132141813, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03355683558154851, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.048575836941599844, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06267030193237588, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06764769928529858, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07568839826621115, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.11789439043030142, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.1631174775958061, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.3526387795060873, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.5531861816346645, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.1822972297668457, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.103849411010742, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.9841563701629639, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.8771291971206665, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.7813525199890137, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.6638447046279907, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.548924446105957, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.4383999109268188, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.3129674196243286, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.1968261003494263, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.0966849327087402, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.9728957414627075, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.8804184198379517, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.7814469337463379, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.7129047513008118, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.662174642086029, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.6119220852851868, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.5668277144432068, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.5278291702270508, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.4989200532436371, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.47010472416877747, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.4461449980735779, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.4248490035533905, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.4056444466114044, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.3865866959095001, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.366696298122406, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.3508545160293579, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.33152222633361816, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.31336015462875366, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.28987786173820496, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.28317949175834656, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2761380076408386, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.27230414748191833, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.28986674547195435, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.3077014088630676, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.29829278588294983, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.33092838525772095, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.32214513421058655, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.4522911608219147, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5346826910972595, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.6862676739692688, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.6745328307151794, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.0309245586395264, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.3332045078277588, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.9490994215011597, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.3866775035858154, "validation/loss_046_lr3.6e+01_wd1.0e+00": 12.496621131896973, "validation/loss_047_lr4.3e+01_wd1.0e+00": 11.031810760498047, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.3861607142857143, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.4025297619047619, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.4305555555555556, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.45535714285714285, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.4799107142857143, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5089285714285714, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5391865079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5731646825396826, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.605406746031746, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6393849206349206, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6661706349206349, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7058531746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.734375, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.7641369047619048, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7849702380952381, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8015873015873016, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8139880952380952, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8263888888888888, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8402777777777778, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8442460317460317, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8519345238095238, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8578869047619048, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8628472222222222, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8700396825396826, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8717757936507936, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8792162698412699, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8878968253968254, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8923611111111112, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8985615079365079, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.908234126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9099702380952381, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9139384920634921, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9186507936507936, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9126984126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9164186507936508, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9231150793650794, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9181547619047619, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9278273809523809, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9087301587301587, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9206349206349206, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9181547619047619, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9265873015873016, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9149305555555556, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9040178571428571, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9055059523809523, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.904265873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8630952380952381, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8940972222222222, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.15044423312127322, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.16318863647934603, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.1958500110875271, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.22572426703075363, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.2585651699121944, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.2963397476051764, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.33752924868634937, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.3951413623310303, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.4547658274463401, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.5121240472649425, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.5553201344142003, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.6220125648635231, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.6712931640530376, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7138384841898291, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.7402634621319073, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.7617953203582657, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.780017190387493, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.7977038827999581, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8133271416243996, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8203779727174197, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8298654832485445, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8383904135266919, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8440655253448174, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8533038773425026, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8545400523102351, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8623472182696863, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8718533449498099, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8786903426442155, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8842496835950516, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8945027956444088, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8985325924289873, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8996430937835931, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9006088599719613, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.897072157705289, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9010669796751637, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9133502655700833, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9084623733895141, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9071874220482211, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8994725000719589, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9063745174819646, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9059942568590872, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9109083102292027, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8994669591632071, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8913524570320677, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8893297767977473, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8824116863282603, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8412894497631659, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8720309734192497, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 37, "lr_best": 0.00249, "wd_best": 0.05, "train/loss_best": 0.16176750579848886, "validation/loss_best": 0.32214513421058655, "validation/acc_best": 0.9278273809523809, "validation/f1_best": 0.9071874220482211} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.9292806932330131, "train/grad": 0.5865190514922142, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.1051619052886963, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.0207003259658816, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.8939378404617309, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.7827992963790893, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.6851364237070083, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.5670210218429566, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.452748913168907, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.3428437262773514, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.2173029243946076, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.1015646931529046, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.0045897862315178, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.8886972254514695, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.8031425499916076, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.7109510520100594, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.6445334322750569, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.5938731618225574, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.5417934893071652, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.4923683528602123, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.44726378820836543, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.411845380961895, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.3745369904488325, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.3416022054851055, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.30946424543857576, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.27870899364352225, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.24950206238776446, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.21688147000968455, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.18772216513752937, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.1649723950959742, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.1401292616315186, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.1123880261927843, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.09763045366853476, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.08979988570325076, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.07597521270625293, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.07064916361123323, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.07516446548514068, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.07128981925547123, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.07943363522179425, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.1134574284683913, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.17226486147381365, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.21076291345059872, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.2697027333173901, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.34577085201628505, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.4374323728773743, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.6589993091672659, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.8496681527327746, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.7126199066638947, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.752677517309785, "train/loss_047_lr4.3e+01_wd1.0e+00": 6.320398276112973, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022358182789757847, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022013302082195877, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021524377157911657, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021142002535052596, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0208626447385177, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02062699418514967, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020562070505693555, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020718379206955434, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02125725413672626, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022185155292972922, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02325097323395312, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02459450057707727, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02535539506934583, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.025769191524013878, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.025772060565650463, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.025609320681542157, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02532891015522182, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02494517109822482, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.024473717068322003, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.023992988308891655, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.023455790700390933, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022948023029603064, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02232141267042607, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.021660661813803016, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02108506280463189, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.020291356057859956, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01917623495683074, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01823195698671043, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01711947483709082, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.015575847714208066, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.014984637352172285, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.014767912039533258, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.013636423577554523, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.01343114081886597, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.015088687813840807, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.01465233758033719, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.01628519908990711, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.019007534082047643, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.026258809877326712, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.029699644448119216, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03506407445995137, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04380674793384969, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0479917918657884, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06347851767204701, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.08363802481442689, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.19037463260814547, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.3109983410313725, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.3945342765003443, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.0408430099487305, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9534684419631958, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.823401689529419, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.710566759109497, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.61212956905365, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.4938688278198242, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.3792086839675903, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.268237590789795, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.141625165939331, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.0284229516983032, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.9388312101364136, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.8372722268104553, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.7640079259872437, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.6871883869171143, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.633307933807373, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.593338131904602, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.5535142421722412, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.5154687166213989, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.4803657531738281, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.4544568955898285, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.42973774671554565, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.41136759519577026, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.39642027020454407, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.38285577297210693, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.3667600154876709, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.3423216640949249, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.3285180330276489, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.3181571066379547, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.2929002642631531, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.2684842646121979, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.26795727014541626, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.273688405752182, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.26937729120254517, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.2678644061088562, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.27994635701179504, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.2953091263771057, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3372478485107422, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.34397414326667786, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.5195621848106384, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.45559728145599365, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.646396815776825, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.8193685412406921, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.2162762880325317, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.0292813777923584, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.101215124130249, "validation/loss_045_lr3.1e+01_wd1.0e+00": 4.210947036743164, "validation/loss_046_lr3.6e+01_wd1.0e+00": 5.974052429199219, "validation/loss_047_lr4.3e+01_wd1.0e+00": 6.271611213684082, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.42137896825396826, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.4402281746031746, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.4697420634920635, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.4962797619047619, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5238095238095238, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.564484126984127, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5987103174603174, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6314484126984127, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6639384920634921, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7021329365079365, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7251984126984127, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7547123015873016, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7715773809523809, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.7899305555555556, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8038194444444444, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8147321428571429, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8291170634920635, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8370535714285714, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8489583333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8581349206349206, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8650793650793651, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8702876984126984, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.873015873015873, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.875, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8807043650793651, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8901289682539683, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8965773809523809, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9010416666666666, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9129464285714286, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9223710317460317, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9213789682539683, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9208829365079365, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9243551587301587, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9263392857142857, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9290674603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9298115079365079, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9211309523809523, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9315476190476191, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.917906746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9372519841269841, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9283234126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9213789682539683, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9060019841269841, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.921875, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.933531746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9117063492063492, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9087301587301587, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9248511904761905, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.1820735635289371, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.20764125599115246, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.24295138345472764, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.278517657416019, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.3178586839090173, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.37574336928740315, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.43748050905555563, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.4949362587440514, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.5540277353064094, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.6231169682944511, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.6608772599712229, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7038737245529063, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.726794641539731, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7505877453435287, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.7683067489637049, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.7841301421602809, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8030863034790944, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.812185800035086, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8251643246975597, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8367468999887601, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8444797845480277, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.850433673446372, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8548590172206345, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8566063625287945, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8637842685202275, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8731857003351584, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8810884559538513, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8858960485973603, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8994835601478838, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9075506579145134, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9057048739231089, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.905062536742442, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9099995521862229, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9123673501355625, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9129783556284736, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9173700152168438, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.914171056897348, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9174007220617503, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8939099920427257, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.926268893812748, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9130236047429735, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.904937369863567, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.888717242132506, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9065083309105714, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9229525018792579, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8871330726080708, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9018621352326952, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9110985298936479, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.21076291345059872, "validation/loss_best": 0.45559728145599365, "validation/acc_best": 0.9372519841269841, "validation/f1_best": 0.926268893812748} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.7025855642557144, "train/grad": 0.402895141094923, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.9691181039810182, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.8781004118919373, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.7446993255615235, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.6308665317296982, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.5327291887998582, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.4152293229103088, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.3009582725167275, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.1896962949633598, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.063360185623169, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.9525750228762626, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.8658601289987564, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.766716226041317, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.6941605247557163, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.6151581285893917, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.5575837449729443, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.513225925937295, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.4671355725824833, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.4231771057844162, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.38247903376817705, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.35021374955773354, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.3155966678261757, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.2847148306667805, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.25494771618396045, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.2272422333806753, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2004833549261093, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.16770952945575118, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.1429542924184352, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.12252023579552769, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.09907917395234107, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.072529540322721, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.057510903524234894, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.04930098915472627, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.04260910928249359, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.045536330845206974, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.043889762740582226, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.06458170342259109, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0552824270259589, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.08266989891417324, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.15866887192241846, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.13455069383606313, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.20154855442233383, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.27381747259758416, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.27286989512853327, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.4067170901875943, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.49499166375026105, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.138173872223124, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.9070743928104639, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.7492043616157025, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022069859616458416, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021717311013489962, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02125319981947541, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02093978603836149, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020769353345967828, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020741132106631994, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020970279155299067, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021522755939513444, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022617958183400334, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023874374162405728, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024795470125973225, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025462759416550396, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02555067578330636, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02528101292438805, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02487971745431423, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.024496928779408334, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02405230631120503, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02361038086935878, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02317929523065686, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022774387458339332, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02223881911020726, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02168752746190876, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.021137811699882148, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.020637351279146968, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02013921329751611, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01894185649231076, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.017695585265755653, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.016545504401437938, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.015087942713871599, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.013126068159472197, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.011854936982272192, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.010875469977036119, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.010092588358093053, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.011140135125606321, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.011182801295071841, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.013963462003739551, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.013440284897806123, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.017683323064120487, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.025838702871697025, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02317103245295584, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.030409121527336536, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0364760164055042, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.037974165265331976, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04701087792229373, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.060883185932179916, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.18000474790111184, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.18899634636938573, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.23949109607143326, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.921613335609436, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.8290276527404785, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.6942923069000244, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.5801247358322144, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.4818438291549683, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.3637624979019165, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.2481755018234253, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.136337399482727, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.0126835107803345, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.9097939729690552, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.8324778079986572, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.7467908263206482, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.6860763430595398, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.6214381456375122, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5758745074272156, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.5416154265403748, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.507774829864502, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.4760001301765442, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.447267085313797, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.4259222149848938, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.405427485704422, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.3872663080692291, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.368617445230484, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.3497074842453003, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.3325398862361908, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.309400349855423, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.2999594509601593, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.288074254989624, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.27737683057785034, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.26409777998924255, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.26895076036453247, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2678907513618469, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.26892805099487305, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.28046926856040955, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2928377091884613, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.33333757519721985, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3026982843875885, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.3000330328941345, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.5057304501533508, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.45551127195358276, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.6327932476997375, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.9644802808761597, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.7337905764579773, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.8730816841125488, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.9998756051063538, "validation/loss_045_lr3.1e+01_wd1.0e+00": 5.285697937011719, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.2863991260528564, "validation/loss_047_lr4.3e+01_wd1.0e+00": 4.359899997711182, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.44419642857142855, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.46899801587301587, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5017361111111112, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5317460317460317, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5625, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.595734126984127, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6326884920634921, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6649305555555556, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6971726190476191, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7309027777777778, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7522321428571429, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7760416666666666, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7934027777777778, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8112599206349206, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8221726190476191, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8333333333333334, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8425099206349206, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8484623015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8559027777777778, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8663194444444444, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8727678571428571, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8762400793650794, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8831845238095238, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8881448412698413, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8931051587301587, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9060019841269841, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9087301587301587, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9151785714285714, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9161706349206349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9241071428571429, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9250992063492064, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.925843253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9263392857142857, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9285714285714286, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9303075396825397, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9201388888888888, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9305555555555556, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9432043650793651, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9223710317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9330357142857143, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9283234126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9169146825396826, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9248511904761905, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9283234126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9365079365079365, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9007936507936508, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9325396825396826, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9362599206349206, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.21189879019102828, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.2437921620429403, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.28637198593403645, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.3280970445460597, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.3760783764646889, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.4331479644520175, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.49861846143875754, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.5559355062078362, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.6175459699782962, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.6691735535543748, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.6990505758357338, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7311199365516362, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.7516348933920829, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7775424532933984, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.790577351476552, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8056613140342679, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8170544641482681, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8243790284373541, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8332824450140192, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8455575628023401, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.853596948136636, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8578038127849561, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8649533851691855, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8689089921342356, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8755839682032152, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8907694134346317, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8948395944407591, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9019538051223273, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9046218103440001, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9115611763048325, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9131991346430096, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9144181498077892, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9138909707136588, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9177330873913102, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9197127586177695, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9059200595895563, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9173960529319897, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9293632632071908, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9158954542223187, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9204079449671487, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9189044150471704, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9051978946176398, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9126983704836117, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9102893023478099, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9201760225700006, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8864384770169013, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9236799853222794, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9263545927999539, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 37, "lr_best": 0.00249, "wd_best": 0.05, "train/loss_best": 0.08266989891417324, "validation/loss_best": 0.3000330328941345, "validation/acc_best": 0.9432043650793651, "validation/f1_best": 0.9293632632071908} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.5665074537694454, "train/grad": 0.2914376007765532, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.845148413181305, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.750727127790451, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.6151384848356247, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.501274106502533, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.403578678369522, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.2860302263498307, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.170126347541809, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.0581023633480071, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.9364154902100563, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.8362989297509194, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.7609832313656807, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.6758592969179154, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.6135345010459423, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5449770921468735, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4943858756124973, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.4552795871347189, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.41386226132512094, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3739498142153025, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.33672974437475206, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.3074956518411636, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2761012735962868, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.2480092415586114, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.22017929669469594, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.19272470839321612, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.16135274596512317, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.12980977959930898, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.10802133684046566, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.08903238168917596, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.06901968711055816, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.04902113066986203, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.03567015890963376, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.028246954502537846, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.022628048146143557, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.020304417824372648, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.01984909899532795, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.04025670361705124, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.04337248863652349, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.03695946448482573, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.09623147782869637, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.12944979282096028, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.1786402015388012, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.28979381108656527, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.17977336226962506, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.2092858413606882, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.2660753657389432, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.565056235352531, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.7689253964088857, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.8583031760249287, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021359074478968978, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021044729081913828, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020672934358008207, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020479806857183577, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02044176571071148, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02062639853451401, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021136595215648413, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02202919644769281, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02335657402873039, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02438223298639059, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024875630140304566, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02499900364317, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.024796882905066014, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.024315782114863394, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02387070029042661, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.023490342209115625, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023046499183401466, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022573714521713556, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02209196020849049, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02162579507101327, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.021058101570233703, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020504691088572145, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.019886092292144896, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.019186124028638004, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0181757913576439, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01702957950066775, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01593638421734795, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.014619869433809073, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.012897999754641206, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.011008878058055416, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.009312686172779649, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.008089145025005564, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0072791709011653435, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.006967131907003932, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006917445579019841, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.012001682902919128, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011913309281226248, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010824697429488878, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.021593797977402573, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.024723758244799682, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02938545740151312, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.041287477948935704, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.030226894811348756, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03550050080620622, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.041931340563332926, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.1599738373081104, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.1121546076533534, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.13118509406747492, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.8234273195266724, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.7282741069793701, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.592247486114502, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.4782261848449707, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.3802343606948853, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.2616097927093506, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.145124912261963, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.03485107421875, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.9202318787574768, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.8294409513473511, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.7623457908630371, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6878229379653931, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.6346640586853027, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5780647993087769, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5384271144866943, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.5087353587150574, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.4780093729496002, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.45010510087013245, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.42518508434295654, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.4072554409503937, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.3894593417644501, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.3735598921775818, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.3573963940143585, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.34098678827285767, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.32162103056907654, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.3094065487384796, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.3029665946960449, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.28888288140296936, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.27813994884490967, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.26720988750457764, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.27569931745529175, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2815762758255005, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.29225510358810425, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.2595120966434479, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.27151647210121155, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.363757848739624, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3242656886577606, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2959466576576233, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3502250015735626, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5273355841636658, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.5131081342697144, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.7257100939750671, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.6497915387153625, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.7495955228805542, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.7231495976448059, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.7558181285858154, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.339512586593628, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.414980888366699, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.47172619047619047, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.49528769841269843, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5307539682539683, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5674603174603174, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5947420634920635, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.628968253968254, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6597222222222222, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6875, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7219742063492064, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7477678571428571, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7703373015873016, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7904265873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8065476190476191, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8234126984126984, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8325892857142857, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8410218253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8509424603174603, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.859375, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8645833333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8673115079365079, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8735119047619048, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8802083333333334, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.886656746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8921130952380952, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8990575396825397, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.90625, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9089781746031746, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9136904761904762, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9196428571428571, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9231150793650794, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9250992063492064, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9241071428571429, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9248511904761905, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9327876984126984, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.935515873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9226190476190477, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9290674603174603, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9422123015873016, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9382440476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9308035714285714, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9404761904761905, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9288194444444444, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9362599206349206, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9412202380952381, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.949156746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9365079365079365, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9372519841269841, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9370039682539683, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.248290643667857, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.27892812214127805, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.32690259472337807, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.3855196251755005, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.42933644370891083, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.4906142718653981, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.5390870509703712, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.5922898269699164, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.6480443463704886, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.6915983610647652, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7227788597222878, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7495346181036133, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.7710881583278733, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7918603760714552, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8049656975266134, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8169630069664597, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8287726355848554, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.838076336691962, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8441385412629538, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.847960790268961, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8538642066288485, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8633369309307222, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8698618916764101, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8745191754007977, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8805539825300531, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8885049886137457, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8919417497783706, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8988419054501358, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9074584224215348, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9119049428857755, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9134187552409427, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9097435288281452, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9108855960254519, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9209358149668044, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.923205117078337, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9028552050836596, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9133371887773495, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9289725513437953, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9256250468601604, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9139829277199818, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9268423407396507, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9059330600286936, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9264701195424078, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.920539146070455, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.940191892128892, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.923155479857759, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9235371424845567, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9304661724624982, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 0.2660753657389432, "validation/loss_best": 0.7231495976448059, "validation/acc_best": 0.949156746031746, "validation/f1_best": 0.940191892128892} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.4899916049838066, "train/grad": 0.20786006674170493, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.7740547752380371, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.678076559305191, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.5418433094024657, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.4279602992534637, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.329630770087242, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.2098848894238472, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.0920063534379005, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.980815287232399, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.8665403857827186, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.7761247509717941, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.7083606167137623, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.631223756223917, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.574145429134369, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5104219768941403, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4628847524523735, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.42515668451786043, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.38507413350045683, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3457460422813892, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.30870215892791747, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.2790720481425524, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2470813673734665, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.2178726740553975, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.18904946483671664, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.1588739873468876, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.12745930681005121, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.09919848459772766, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.07824885512702168, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.06059380960650742, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.04410988482646644, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.02950881337746978, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.020316544249653817, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.014817002676427365, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.01023965054191649, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.007033504331484437, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00613542826846242, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.02586514866910875, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.022625665292143823, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.019233298590406775, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.04474893412552774, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07858086809515953, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.12839382118545473, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.13775825344026088, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.10825190391391516, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.12377121132798492, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.14487988163717092, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.5931748674064875, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.37453397010453043, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.5426324717979878, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021225368333980442, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020952338338829576, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02069316222332418, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020641777445562184, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02077603027690202, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02123473659157753, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022101308573037385, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023250525910407306, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.024475630521774292, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.025115090422332287, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025260366862639785, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02505560278892517, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.024666788391768932, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.024084294307976963, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02357074309606105, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.023118508686311542, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022593309939838945, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022032120325602592, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021392686804756523, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020810829717665912, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02012203233782202, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01942040977999568, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.018621572935953738, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01753523625433445, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.016169122247956694, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01474302601767704, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.013441149911377579, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01196117304963991, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.010106451070168988, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.007841801682370714, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006089018045458943, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0048112283486989324, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.003887953623197973, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0032869483581453095, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0030267045214714015, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008843441888893721, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00865869931614725, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.007473654555651592, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.013144731868815143, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02017084464780055, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02612049170566024, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.026479814727208575, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02377936282842711, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.025988105838141564, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03268988059295225, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08536575630110957, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.06963565678331993, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.09427142607089228, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.7433398962020874, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.6472623348236084, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.5112075805664062, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.3974475860595703, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.2993457317352295, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.1802085638046265, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.0643500089645386, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.9587778449058533, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.8538120985031128, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.7733134031295776, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.7140568494796753, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6483270525932312, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.6013386845588684, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5506485104560852, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5145136713981628, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.48649489879608154, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.4577808380126953, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.4309122860431671, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.40682533383369446, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.388005793094635, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.36785662174224854, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.35120144486427307, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.3369423449039459, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.3196698725223541, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.3019423186779022, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2920624911785126, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.28512731194496155, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.2753917872905731, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.2736011743545532, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.27128422260284424, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.27817198634147644, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2831377685070038, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.2743459939956665, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.258881151676178, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2652027904987335, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.2651582658290863, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.29731833934783936, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2898428440093994, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3858339190483093, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.44698819518089294, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4352901875972748, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.6348264217376709, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.756222128868103, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6527146100997925, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.7412257194519043, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.108436107635498, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.7623426914215088, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.346409320831299, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.49007936507936506, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5163690476190477, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5577876984126984, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5895337301587301, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6185515873015873, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.652281746031746, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6822916666666666, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7162698412698413, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7472718253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7715773809523809, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7829861111111112, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7991071428571429, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8122519841269841, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.826140873015873, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8390376984126984, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.847718253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8563988095238095, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8628472222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8717757936507936, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8787202380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8829365079365079, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8893849206349206, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8938492063492064, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9020337301587301, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9089781746031746, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9107142857142857, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9146825396825397, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9208829365079365, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9253472222222222, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9253472222222222, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9255952380952381, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9260912698412699, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9293154761904762, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.935515873015873, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9377480158730159, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9382440476190477, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9384920634920635, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9441964285714286, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9392361111111112, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9437003968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9481646825396826, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9422123015873016, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9280753968253969, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9454365079365079, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9459325396825397, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9409722222222222, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9499007936507936, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9441964285714286, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2738678530508072, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3085703031767659, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.37129483894991155, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.42378701077446723, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.47561335566693974, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5308772728210442, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.5859258405048285, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.6424294952463471, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.6907475801918859, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7237311380635264, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7398223399298047, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7613918387060339, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.7795958876963054, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.797968172173034, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8136509805707186, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8249732364753631, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8344265256369235, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8432602922156134, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8538949513943841, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8613473531196515, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8662911727749665, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8729950760403413, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8769996650459442, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8871966456354802, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8941144532833034, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.896096365721709, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9000283208140628, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9094596485222579, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.913809245415108, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9131272508369915, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9150228125046931, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9144292531405237, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9174162443165808, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9239572968145808, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.928890497352937, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9302965185993479, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9242369832018076, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9338018345400277, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9303214576127277, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9329224887594817, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.941811646770619, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9357327340504983, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9206681918879228, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9344753209596429, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9318015714891085, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9313333626184738, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9422852032695278, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.933724162551234, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 0.37453397010453043, "validation/loss_best": 1.7623426914215088, "validation/acc_best": 0.9499007936507936, "validation/f1_best": 0.9422852032695278} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.44111885339021684, "train/grad": 0.16455797862261534, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.7031926107406616, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.6072196632623672, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.4722549718618394, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.3595838552713395, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.2619308400154114, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.1424173408746718, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.0255501103401183, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.918712155520916, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.8110695940256119, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.7260462701320648, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.6622433440387249, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.589002822637558, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.5347254370152951, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.47438677966594694, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4293851552903652, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.39358837977051736, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.35566619150340556, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3184405217319727, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2834102086350322, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.25535645853728056, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2247417876496911, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.19656040906906128, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.16799204332754017, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.13632746059447526, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.10801292976364493, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.08198134830221533, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.06222401554696262, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.045567690087482335, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.03070809572003782, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.018656521225348114, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.011897612381726503, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.008475548578426242, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.004722002418711782, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0039581132121384146, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0024197604414075615, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.006678078109398484, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.007931983321905136, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0074221780989319084, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.017929098941385745, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05449460289441049, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.04820566671900451, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.05097528045997023, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.08159397390671075, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.09300124920904636, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.08496356607414782, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.28950601181946695, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.17639958299696445, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.22041986506432296, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020989237329922618, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020760675738565625, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020597953372634946, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020667693056166173, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02093451072461903, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02160840782802552, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022673870320431888, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023856212878599764, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02487183714285493, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02527948866598308, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025293694464489817, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025013259472325446, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02461422810330987, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.024014761210419237, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.023492103083990513, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022994618280790746, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022418529782444237, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02174768808297813, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0210393168265, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020424751420505344, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.019684325782582165, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.018887856262736022, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0179643787862733, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.016560613373294474, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.015151805537752807, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.013500301807653159, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.011811071697156877, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.009979544091038406, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.008071743887849153, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006163495985674672, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004389228009385988, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.003208922356425319, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0017276699768262915, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0016875073054688982, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0012027970440976787, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.003446187524677953, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004082406297202397, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.004071407706160244, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007896149745865841, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.014515711652534265, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01666554886655376, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.01647608792318579, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01929768514773059, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02184936022193142, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.022604956987312335, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05837426120919597, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.050101629826459514, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.052740354047972865, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.678989052772522, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.5828169584274292, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.4471609592437744, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.333838939666748, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.235336422920227, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.115745186805725, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.001994013786316, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.9024384617805481, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.8059443831443787, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.7317586541175842, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.6773011684417725, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6160612106323242, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5720788836479187, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5253082513809204, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4915902018547058, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.465821236371994, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.4395245313644409, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.4146694242954254, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.39232853055000305, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.3750379681587219, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.35631904006004333, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.34140291810035706, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.3278132975101471, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.30970221757888794, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.29755401611328125, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2939974367618561, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.29077595472335815, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.28305789828300476, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.2793121039867401, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.2744322419166565, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.2826210856437683, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2828420102596283, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.2815447449684143, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.26024049520492554, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2597314119338989, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.2572880685329437, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.26976117491722107, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2692364752292633, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.32719555497169495, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.36105281114578247, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.39178934693336487, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.47885292768478394, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5091056227684021, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5109494924545288, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5085480213165283, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.5924137830734253, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.519539713859558, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.9602326154708862, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5079365079365079, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5324900793650794, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5773809523809523, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6088789682539683, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6369047619047619, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6701388888888888, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.703125, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7338789682539683, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.761656746031746, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7802579365079365, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7926587301587301, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8134920634920635, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8216765873015873, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8328373015873016, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8425099206349206, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8541666666666666, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8628472222222222, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8695436507936508, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8757440476190477, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8812003968253969, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8874007936507936, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.892609126984127, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8985615079365079, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9050099206349206, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9107142857142857, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9134424603174603, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9161706349206349, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9221230158730159, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9241071428571429, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9260912698412699, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9246031746031746, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9288194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9320436507936508, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9350198412698413, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9419642857142857, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9399801587301587, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9461805555555556, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.949156746031746, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9474206349206349, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9548611111111112, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9496527777777778, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9469246031746031, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9516369047619048, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.957093253968254, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.951140873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9464285714285714, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9494047619047619, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2969377004642685, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.33101473348708754, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.40186576783640693, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.458304267330997, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5033922169914045, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5622552085128062, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6215399037057924, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.6707744490705645, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7115040691273775, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7358229901166069, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7518769537311757, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7764748481632128, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.7882660129253186, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8050818103443851, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8168921770386979, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8307512004109595, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8421773880239363, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8515295118792027, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8588844419813083, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8648147481229416, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8722293565321605, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8764485689889925, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.882370308703583, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8898392159757068, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8966299092743183, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.899095801523198, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9026038538242604, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9110581110399261, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9126475555862739, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.91572052263986, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.912648143318618, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.91721118688339, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9199375757402807, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9226349467574299, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9312482436691292, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9302048004875024, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9346191139899682, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9385765672311107, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9386129827590985, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9461995660820316, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9448805785493688, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.941919515253671, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9417360632024082, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9446908530122483, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9499646652412, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9395551455018725, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.940131376864726, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9415879400867792, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 0.08496356607414782, "validation/loss_best": 0.5085480213165283, "validation/acc_best": 0.957093253968254, "validation/f1_best": 0.9499646652412} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.40742840513587, "train/grad": 0.13864329345524312, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.6340484708547591, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.5389824533462524, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.4055965048074723, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.294022724032402, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.1968489238619804, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.0785369098186492, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.9658569052815438, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.8667838031053543, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.7689628468453884, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.6916596806049347, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.6328376786410809, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.5647671307623386, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.5133838833868504, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.455172416716814, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4108005605638027, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.37529534675180914, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.33702200002968313, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.29956730604171755, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.26417587280273436, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.23564696483314038, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2046891801431775, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.17638345006853343, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.1470831990800798, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.11484418057836593, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.08811515798792242, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0635666942037642, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.04627329002134502, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.033075205506756904, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.020821957932785153, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.011317397113889455, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.007184513797983527, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.004898812640458345, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.002905179616063833, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0020098237227648497, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0013308548461645842, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0012123522814363242, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0020718047674745323, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0013012067042291165, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00750984376296401, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.01346536010503769, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.028280035369098188, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.031639318680390716, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.03718395376577974, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.05769818640314042, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.03917388813570142, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.10310443828813731, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.07313696396537125, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.06687258558347821, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020908943330869078, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02071099106222391, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020612616715952756, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02076285790652037, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021129885241389274, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021963986549526453, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023133987700566648, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.024234824292361738, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.024978682147338987, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02512968359515071, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024959961157292126, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02450084116309881, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02399268434382975, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.023289173925295472, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022678169258870184, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02215094376821071, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021537164421752096, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02088860890828073, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020230811010114848, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.019652086389251054, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01893869853578508, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.018124555656686425, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.016988983126357198, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.015353960902430117, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01383442321792245, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01195727605954744, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.010229959696298465, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.008472202547127381, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006130014245281927, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0038294433715054766, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.002597149831126444, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.001796162793179974, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0009376529227301944, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0007100839818303939, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00048290974584233484, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0006578750594781013, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0013437849178808393, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0010052360606005095, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00486727078747208, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.006266311837985086, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.011583936538315812, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.011528877316627302, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.012395510472724709, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.016702318782035944, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.014286074820321062, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.035241628561865974, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.03174211811487211, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.03428487180877582, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.6283762454986572, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.5322908163070679, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3973039388656616, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.284014105796814, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.1852704286575317, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.0662059783935547, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.9558255672454834, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.8616283535957336, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7720370292663574, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.7034287452697754, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.6528300046920776, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5959713459014893, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5549082159996033, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5105804204940796, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4780629873275757, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4531466066837311, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.42728835344314575, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.40306657552719116, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.38197749853134155, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.365879088640213, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.3489440679550171, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.33475202322006226, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.3202753961086273, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.3045427203178406, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.2956916391849518, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.29232266545295715, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.28995388746261597, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.284129798412323, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.2849206030368805, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.2818889617919922, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.28976431488990784, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.28922349214553833, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.28407999873161316, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.2622629404067993, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2601469159126282, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.24633413553237915, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.24636726081371307, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2484053522348404, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2638523578643799, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.32565924525260925, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.41966304183006287, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3796217739582062, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.459351122379303, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.46849358081817627, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.46182781457901, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.3008105754852295, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.1955472230911255, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.5032799243927002, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5188492063492064, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5481150793650794, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5907738095238095, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6217757936507936, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6510416666666666, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6832837301587301, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7155257936507936, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7420634920634921, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7686011904761905, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7864583333333334, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8018353174603174, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8179563492063492, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8296130952380952, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8402777777777778, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8509424603174603, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8561507936507936, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8653273809523809, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8722718253968254, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8767361111111112, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8824404761904762, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8888888888888888, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8931051587301587, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9005456349206349, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9089781746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9114583333333334, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9156746031746031, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9134424603174603, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9188988095238095, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9201388888888888, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.923859126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9248511904761905, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.925843253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9303075396825397, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9370039682539683, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9432043650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9434523809523809, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.949156746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9533730158730159, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9521329365079365, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9573412698412699, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9516369047619048, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9528769841269841, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9565972222222222, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9543650793650794, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.951140873015873, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9523809523809523, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.30985926661806795, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3550205551188401, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.42215913981984243, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.4819332432778498, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5256342316398642, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5883067610226753, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6413618167929185, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.6851257878932, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7214092605370505, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7424928667847115, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7636886975264048, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.785908776383896, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8016956789384926, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8155973193045963, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8280501714707413, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8347201763195089, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8445106550596938, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8520808934761386, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8579644789988341, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8648796712716342, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8718288769217372, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8761769810757847, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8829796593322727, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8922189761500179, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.89533407136999, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9006168575109127, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8980573008040134, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9055890445251307, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9074681659242261, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9128327933100296, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9141516348024212, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9121704539670391, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9169919454964315, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9252331634691958, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9317882973734859, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9346610901140926, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9386785911563328, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.943794578636453, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9477283073462244, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9438250315525106, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9439278926890147, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9502455967090616, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9420028513371489, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9423441467089608, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9502931821251748, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9468385684354897, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9436247184735559, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.942803331854856, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 0.031639318680390716, "validation/loss_best": 0.3796217739582062, "validation/acc_best": 0.9573412698412699, "validation/f1_best": 0.9502455967090616} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.389238054305315, "train/grad": 0.11973290026187897, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5979897314310074, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.5026976943016053, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.368934541940689, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.256329460144043, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.1577867063879967, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.038224988281727, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.9268329918384552, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.8310245126485825, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.737942715883255, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.6646471540629864, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.6088367488980293, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.543634571582079, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.49436505943536757, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.43824232324957846, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.39535480834543707, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3608521308004856, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.3237023065239191, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.2870171952992678, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2522249659895897, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.22389903362840413, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.19318278145045042, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.16469246469438076, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.13414598640054465, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.10228967473842204, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.07649590217508376, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.05276865119114518, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.03671173272654414, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.025508010881021618, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.01613785618916154, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.00883829165250063, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.005399051159620285, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0037796972971409558, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.002454043412581086, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0016611877642571926, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0011231731995940209, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0008057631645351648, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0005192942451685667, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0003675961680710316, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0009453704580664635, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.003697779914364219, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.009234620882198214, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00845231375657022, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.01142219697125256, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.015694440240040422, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.014375723227858543, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.05375447232276201, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0373836266528815, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.03340865682810545, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020659314608201383, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02050555542577058, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020498399212956427, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02075116251129657, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021242198557592927, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02223654913716018, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023451280258595945, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.024426844445988537, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.024968311535194517, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024980047680437566, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024731816947460173, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024207641026005148, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023686590623110532, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022993650995194913, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022362325303256513, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02178954167291522, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02110670000780374, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02033475203439593, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019507580958306788, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.018773816116154193, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.017868410097435117, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.016900082821957766, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01561045631300658, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.013907863982021809, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.012169955810531975, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.010115827713161708, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.008377208422170952, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006898902044631541, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005173270712839439, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0032298572064610198, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0018716982449404895, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.001258534200314898, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0007368892701924779, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0005170925737184007, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0003609804126608651, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0003357910440900014, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0002644730540487217, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00027175607046956427, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0007764108156015936, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0028034879708593507, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005871241930639144, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0045103213522173745, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.006517534812268196, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.008435344486811118, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.008011632214904765, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.020063856062314523, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.016293553280768588, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.017913800034784502, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5890198945999146, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4933239221572876, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3586390018463135, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.245307445526123, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.146485447883606, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.0283417701721191, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.9212261438369751, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.8316157460212708, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.746897280216217, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6818925142288208, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.633963942527771, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5798373818397522, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5407394170761108, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.49751436710357666, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4658081829547882, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.44071415066719055, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.4153291583061218, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3925631046295166, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.3726406693458557, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.3578372895717621, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.3434446454048157, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.33200594782829285, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.31978538632392883, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.30477648973464966, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.2982766330242157, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2970019578933716, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.2961517870426178, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.2902942895889282, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.2926252782344818, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.2927660346031189, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.2955802381038666, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2920209467411041, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.2865009009838104, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.2628956139087677, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2592190206050873, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.24509289860725403, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.24249032139778137, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2404627650976181, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2631063163280487, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3042466938495636, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3291468620300293, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.36550021171569824, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3939536511898041, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3820178210735321, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.40937739610671997, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.198994517326355, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.024234652519226, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.1594454050064087, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.529265873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5620039682539683, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6006944444444444, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6331845238095238, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6649305555555556, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6927083333333334, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7291666666666666, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.753968253968254, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7755456349206349, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7876984126984127, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8040674603174603, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8214285714285714, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.830109126984127, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8464781746031746, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8566468253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8618551587301587, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8680555555555556, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8764880952380952, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8797123015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8831845238095238, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8881448412698413, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8916170634920635, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9005456349206349, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9077380952380952, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9122023809523809, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9112103174603174, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9151785714285714, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9201388888888888, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9201388888888888, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9241071428571429, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9260912698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9265873015873016, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9293154761904762, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9365079365079365, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9432043650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9439484126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9486607142857143, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9565972222222222, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9583333333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9588293650793651, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9600694444444444, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9565972222222222, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9548611111111112, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9578373015873016, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.32720001170959573, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.37583990925631094, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.4431393561194194, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5003949305090474, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5487354877201077, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6012037249093731, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6627245054344639, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7006569730588936, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.729735952821587, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7456660303876574, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7661575446591533, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7900829560078476, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8011733142137296, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8211730586110335, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8338832186929175, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8411271717630164, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8480689812753859, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8582002548063753, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8626549655037044, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.865145341318204, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8697878691701627, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8736232509057841, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8833603876264347, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8928455321070393, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8975130774712161, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8957873922343573, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9010950637805712, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9083490536868938, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9089273758789118, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.913333218939802, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9152534042172071, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9132309194699941, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9164176697256345, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9247268759447588, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9323140299051046, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9349716040582754, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9383945868035704, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9441426982111493, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9508585538074131, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9497180595493869, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9504072214129202, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9512837721087276, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9459041381628867, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9515242871078675, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.953017029406641, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.949435432529128, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9498446059383662, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9498329462142036, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 0.014375723227858543, "validation/loss_best": 0.40937739610671997, "validation/acc_best": 0.9600694444444444, "validation/f1_best": 0.953017029406641} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.3727456337213516, "train/grad": 0.11195077929645776, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5661401426792145, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4702643448114394, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.3354291236400604, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.2209603148698807, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.1203074431419373, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9984685918688774, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8873606994748116, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7934616321325302, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.7033005137741566, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.632832907885313, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5795473973453045, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.5175023143738509, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.4704056914150715, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.41639850102365017, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3744953638315201, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.34053016163408756, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.304061370305717, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.2677799359336495, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.233348003923893, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.2054809083044529, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.17526692368090152, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.14751230489462613, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.11763987066224217, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.08890819495543838, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.06542232456617057, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.04418632745742798, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.029937095167115332, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.020049061542376876, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.012080270955339074, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.006439015930518508, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00407068707048893, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0029826295282691716, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.002027955027297139, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0014100843388587237, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00097873043268919, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0007153893820941448, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0004448629170656204, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.00028885102830827236, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00023814924992620945, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.00043724462389945986, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0014926345646381377, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0014626537449657917, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.003024808568879962, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.007217161795124411, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.002541830698028207, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0170667028054595, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.01035647832788527, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.015387903470546006, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020538687175139785, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020444643939845263, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020546921929344535, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020917745186015965, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021520743141882123, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022627832437865435, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023830272797495125, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02467312203720212, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.025048309415578843, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024967835256829857, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024701389586552977, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024190151952207087, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02368151002097875, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02300571831408888, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02239782313350588, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021829664180986582, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021146135032176973, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020352924391627312, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019498668839223684, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01869354612659663, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01771235278341919, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.016638686959631742, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01506892627105117, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01328775574453175, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.011506378017365933, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.009334943776484578, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00747505875537172, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005812032804824412, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0039692221500445156, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.002149453375604935, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0012790091114584356, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0009215527049673256, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0006002575237653218, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00042920407075143887, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0003095365694025531, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00026911607037618526, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00020271272789614157, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0001629850669132793, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00018845183034954971, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0005110869399214834, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0008419473574861058, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0013401047118517795, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0025386144186063577, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.004788303224859192, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.002360218280728361, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.011104045503266727, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.006946961775870839, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.009497466946141283, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5592912435531616, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4638944864273071, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3296352624893188, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.2162275314331055, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.1174025535583496, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.0001516342163086, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8962305188179016, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.8101551532745361, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7282560467720032, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6655726432800293, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.6187392473220825, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5663724541664124, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.528157651424408, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.4864709973335266, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4559699296951294, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4324992001056671, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.4088630974292755, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.38709691166877747, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.3677244484424591, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.35268089175224304, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.33750343322753906, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.325542151927948, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.309807151556015, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.29996615648269653, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.2960548400878906, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2931631803512573, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.2914513349533081, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.28806349635124207, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.29160186648368835, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.29297590255737305, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.29916632175445557, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2966369390487671, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.2904585003852844, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.2659529447555542, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2604299783706665, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.2436772882938385, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2383332997560501, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.23462313413619995, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.25232580304145813, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2793022394180298, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2992183566093445, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3220411539077759, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3499089777469635, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3254985213279724, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.38196122646331787, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8941220641136169, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.8255118727684021, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9849830269813538, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5406746031746031, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5701884920634921, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6078869047619048, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6436011904761905, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6721230158730159, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7021329365079365, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7348710317460317, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7569444444444444, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7795138888888888, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7953869047619048, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8115079365079365, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8256448412698413, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8350694444444444, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8464781746031746, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8571428571428571, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8653273809523809, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8715277777777778, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8777281746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8841765873015873, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.888640873015873, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8936011904761905, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8965773809523809, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9040178571428571, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9094742063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9144345238095238, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9166666666666666, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9203869047619048, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9228670634920635, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9231150793650794, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9241071428571429, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9250992063492064, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9270833333333334, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9290674603174603, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.935515873015873, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9429563492063492, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9444444444444444, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9479166666666666, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9561011904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9585813492063492, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9620535714285714, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9608134920634921, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9573412698412699, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9600694444444444, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9598214285714286, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9588293650793651, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9583333333333334, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9568452380952381, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3437215758839873, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.39191731785693157, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.45741444363085604, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.514756078337746, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5589142679220211, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6191323277548249, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6704331112492722, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7043563296134725, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7341987947467112, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7541335687805069, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.774644011741125, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7953606504115671, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.808910011375449, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8224489454608415, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8367196090973748, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.845531346983503, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8521497688868932, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8596749507835206, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8666811174421845, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8719878064300058, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8771642559479008, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.880329929579612, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8890802924307947, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8941288025429088, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8997926488759156, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.902191218354749, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.907579386771569, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9119958308658015, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9118886363047652, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9133567602022448, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9145553129358076, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9142396987187063, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9161679888995573, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9247693142328961, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9316407667501694, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9357678414455046, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9372657667098702, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9458448355184901, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.949015588615999, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9479496860803871, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9545296730812396, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.952265295409572, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9487927692789259, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9530013264144435, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9543428099864416, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9516343233140384, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9531462091171172, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.951802140672593, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 0.0014926345646381377, "validation/loss_best": 0.2992183566093445, "validation/acc_best": 0.9620535714285714, "validation/f1_best": 0.9545296730812396} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.36578552290797234, "train/grad": 0.10668076697736978, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5386028689146043, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4428799259662628, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.3084093421697616, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1942702600359916, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.0941351121664047, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.974121299982071, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8662974533438682, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7761758305132389, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6898477014899254, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.6221570605039597, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5706068648397923, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.5104692857712507, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.46485119476914405, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.4125360156595707, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3716407908499241, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3385642109811306, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.3027518259733915, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.26691240128129723, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.23254345174878835, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.20426274731755256, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.17321319900453092, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.14421915562823415, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.11208886226639152, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.08289013701491058, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.05946770140901208, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.038993959948420524, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.025609987853094936, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.01677422464825213, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.010028697904199363, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.00570462460629642, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0037387765292078256, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.002760750437155366, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.001913224784657359, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0013261031731963157, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0009358500875532627, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0006656170357018709, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0004282785952091217, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.00028253517113626005, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00021450641565024854, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.00013849704526364804, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00011582804843783378, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00010401769541203975, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0002995185740292072, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0010090840794146061, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00038241498172283175, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0038389822747558356, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0009314605500549078, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.006504365298897028, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020892016938887537, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020801389347761868, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02088951061014086, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021250005699694156, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02185338663868606, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02294539990834892, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.024082152899354697, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02479834299534559, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.025016292482614517, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02480703159235418, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024429124956950547, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02381515116430819, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023235015710815786, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022497923923656345, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02183394992724061, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0212498194584623, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02055042958352715, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.01974864919669926, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018895689966157078, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.018111638049595057, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.017150453506037593, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01604212624952197, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.014373859607148915, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01252552333753556, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.010563978992868215, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.008252777892630547, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.006292022479465231, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004604886774322949, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0029266982374247163, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0016294534484040924, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0010541592416120694, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0007650075921264943, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0005243978044018149, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00037947447468468455, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0002780170506594004, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00023852703267039033, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00018330085729758138, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00014840855459624436, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00014844686634205573, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00012983282209688696, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00013661520954997287, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00019002669311305454, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0003997851853807788, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.001227449446149933, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0005496851815496484, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.002187381304192229, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0021946179572126397, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0048932217929028325, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.538098692893982, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4428330659866333, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3089418411254883, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1954545974731445, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.0968058109283447, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9807431101799011, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8790506720542908, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7955027222633362, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7163296341896057, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6554477214813232, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.6098831295967102, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5592530965805054, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5223169922828674, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.48163536190986633, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4518686830997467, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.42893943190574646, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.4055766761302948, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3840855360031128, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.36454373598098755, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.3499031960964203, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.33579307794570923, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.3247775137424469, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.30966898798942566, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.3015458881855011, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.298574835062027, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2963707149028778, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.296017587184906, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.29237183928489685, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.2961794137954712, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.29524993896484375, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.30052611231803894, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.29811692237854004, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.29135656356811523, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.26558732986450195, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2601921856403351, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.24259983003139496, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2364167720079422, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.23164141178131104, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.24566835165023804, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2690371572971344, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2858119606971741, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3015833795070648, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3043266236782074, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3109246492385864, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.348398894071579, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7934849858283997, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.7459766268730164, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9408529996871948, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.548859126984127, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5773809523809523, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6168154761904762, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6510416666666666, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6763392857142857, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7078373015873016, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7395833333333334, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7619047619047619, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7837301587301587, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8025793650793651, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8152281746031746, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8291170634920635, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8368055555555556, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8479662698412699, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.857390873015873, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8663194444444444, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.873015873015873, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8797123015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8869047619047619, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8908730158730159, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8943452380952381, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8988095238095238, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9017857142857143, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9089781746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9131944444444444, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9156746031746031, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9169146825396826, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9216269841269841, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9221230158730159, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9233630952380952, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9248511904761905, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9270833333333334, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9293154761904762, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9360119047619048, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9432043650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9434523809523809, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9486607142857143, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9556051587301587, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9565972222222222, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9568452380952381, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9618055555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9613095238095238, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9613095238095238, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9610615079365079, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9565972222222222, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3574375965060055, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.40279307261342273, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.47240883442280174, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5254275639534265, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5718269463789067, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6286691081180446, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6782552214234088, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7124811342974732, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.73996037485923, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7640922430653928, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7816531903548649, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8001938170685696, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.811621670739666, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8250448217411527, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8368695771445284, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8468577478426663, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8551401170796924, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8638357576299156, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8719920283563211, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8758071183662989, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8793126659077234, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8840554881400742, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8853088094989433, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8935681225840132, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8985430462879217, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9014274226500069, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9041913914303746, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9110982107634703, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.910667679343992, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9124278475458729, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9134265936410978, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9137492489065725, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9169986584591986, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9246758034653404, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9321772265217563, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9342163469113335, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9387149633700176, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9459575961280796, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9502288973762274, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9466586359462151, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9548017821393106, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9537857088349763, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9515868811038747, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9552202752076566, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9559650807181544, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9543802631684583, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9558787051956524, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9508856962701161, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 0.0010090840794146061, "validation/loss_best": 0.3109246492385864, "validation/acc_best": 0.9627976190476191, "validation/f1_best": 0.9552202752076566} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.35693571984767913, "train/grad": 0.10463502638041973, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5168490087985993, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.421776003241539, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2877104645967483, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1737354069948196, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.07348476678133, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9542694038152695, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8481889653205872, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7594033381342888, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6737514078617096, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.6060330930352211, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5543495509028434, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.49392266348004343, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.4479205587506294, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.39539240635931494, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3545276415348053, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3217678351700306, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.2864351762086153, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.25151819340884685, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.21794010527431965, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.19040214627981186, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.16010340195149184, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.13227277321740985, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.10150812549516558, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.07397447923198343, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.05204362721182406, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.03319539935328066, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.02136718265712261, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.013874765038490295, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.008350187297910452, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.004746341919526458, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0031690526008605955, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.002387879565358162, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0016870474442839622, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0011789128091186285, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0008541007619351148, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0006046716682612896, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00036922263912856577, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0002687645610421896, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00020560313947498799, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.00010366159491240978, "train/loss_040_lr1.4e+01_wd1.0e+00": 9.382202289998532e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 6.83524552732706e-05, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00011513433419167996, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00012185217812657356, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00036997378803789617, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.684498533606529e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0002492292132228613, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.00026709413155913354, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02050248089246452, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02045935002155602, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020629381416365503, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021077856281772257, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021777246687561272, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022956929886713624, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02410330679267645, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02477953684516251, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02494333197362721, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02470399072393775, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024303409419953823, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02369965803809464, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02315489967353642, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02245967331342399, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021836779620498417, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02126286846585572, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020552048231475054, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019721936690621077, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01880659972783178, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.017920924257487057, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.016829060944728552, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.015608705929480493, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.013884970345534384, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.011881382956635206, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.009811025613453239, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.007441408017184585, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005518154299352318, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.003914174584206193, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0024688740022247658, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0013939506901078857, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0009112785921024624, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0006876625858421903, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0004758563690120354, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00035031239058298526, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0002619981223688228, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00022239928312046687, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00016424393881607103, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00013569458532401767, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0001400063516393857, "train/grad_039_lr1.2e+01_wd1.0e+00": 9.415048504251899e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00010718166914813309, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00010030150774696267, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00014211509115568787, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00019929944423665802, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00045489087671796825, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0003749635479583488, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.000853651939117647, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0009402403364836509, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5237537622451782, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4285305738449097, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.294623613357544, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1812963485717773, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.0827618837356567, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.967679500579834, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8673095703125, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7852376103401184, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7076284289360046, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6478322744369507, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.603403627872467, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5533449053764343, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5168164372444153, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.47721901535987854, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.44746965169906616, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.42517128586769104, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.40247243642807007, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.38123443722724915, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.36215588450431824, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.3473159968852997, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.3331184983253479, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.3217598795890808, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.30644020438194275, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.2984398901462555, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.29553574323654175, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.29385679960250854, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.29494383931159973, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.29245954751968384, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.29732218384742737, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.2977920472621918, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.303607702255249, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2999844551086426, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.29303184151649475, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.26682180166244507, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2614658772945404, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.24234120547771454, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.23506249487400055, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2297176718711853, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.24202963709831238, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2624950110912323, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.27846935391426086, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2909819483757019, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2910882532596588, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2919817864894867, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3316361606121063, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7272453904151917, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.7143147587776184, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.8392717838287354, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5515873015873016, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5815972222222222, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6185515873015873, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6537698412698413, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6780753968253969, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7118055555555556, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7430555555555556, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7633928571428571, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7847222222222222, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.800843253968254, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8157242063492064, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8268849206349206, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8375496031746031, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8482142857142857, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8591269841269841, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8687996031746031, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8742559523809523, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8807043650793651, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.888640873015873, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8918650793650794, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8950892857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9007936507936508, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9067460317460317, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9104662698412699, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9139384920634921, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9159226190476191, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9174107142857143, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9211309523809523, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9236111111111112, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9248511904761905, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9253472222222222, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9275793650793651, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9285714285714286, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9362599206349206, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9432043650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9446924603174603, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.949156746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.955109126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9561011904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9565972222222222, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9623015873015873, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9620535714285714, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9598214285714286, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9623015873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9605654761904762, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9623015873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9595734126984127, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3635982655252648, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.409219652398451, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.4732757424610956, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5290588403620208, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5753663072297137, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.633575231888684, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6843955492620329, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7147296849563856, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7411604733788485, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.76001386648048, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.781978848922803, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7970587743447862, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8117720946640397, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8253802387252177, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8375948607394819, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8495370790145115, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8552221302712145, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8629656116305688, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8724895860336918, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8763902491559538, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.878768189872754, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8851667939130766, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8917700172085647, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8962826403821177, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9006373736504126, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9028338580211, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9044405313089414, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9109007718488425, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9129274328165916, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9143195715226524, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.915051935103373, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.914303524714073, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9153032082625187, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9249900214954342, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9323238322237143, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9355749821442746, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9389832447451771, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9456641589149365, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9489765099860735, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9465364223134949, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9546226441341785, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9538538387093206, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9523335532846569, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.954454226242302, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9550030257550003, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9555719057524069, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9534958987814027, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9545499763795158, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 9.382202289998532e-05, "validation/loss_best": 0.27846935391426086, "validation/acc_best": 0.9623015873015873, "validation/f1_best": 0.9546226441341785} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.35657288894057276, "train/grad": 0.10497785065323115, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5120200961828232, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.417612288594246, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.284198454618454, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.170315931737423, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.070154978632927, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9514230611920357, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8462088963389397, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7585932826995849, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.674302923232317, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.6078150707483292, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5567722168564796, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.4970835182070732, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.4514999694377184, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.3988955809175968, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3582109606266022, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3249246006458998, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.2891084563732147, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.25314009718596936, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.21875856287777423, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.19040815960615873, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.1592871266975999, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.13021196417510508, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.09837620174512267, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0707287322729826, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04875544781796634, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.030652106208726763, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.019592131171375513, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.01278898550197482, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.007681843042373657, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.004524388704448938, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0030257589276880027, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0022927527409046886, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0016142937541007995, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.001175528671592474, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.000826839841902256, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0005927929282188416, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.000381282577291131, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0002695245761424303, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00020583158358931542, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.00011043963953852654, "train/loss_040_lr1.4e+01_wd1.0e+00": 9.592232294380665e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 7.21915066242218e-05, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0001299537066370249, "train/loss_043_lr2.2e+01_wd1.0e+00": 9.160947054624557e-05, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00018311914056539535, "train/loss_045_lr3.1e+01_wd1.0e+00": 8.33483412861824e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.460851661860943e-05, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.0074389651417733e-05, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020784427374601365, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020746095599606634, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020953052253462376, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02144619229249656, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02218008657451719, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023384401900693776, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.024485901771113275, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02509305906482041, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02521289170719683, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024956955956295133, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024560611685737966, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023917923914268613, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02332510300911963, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02254152624402195, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02186066440306604, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02122318590991199, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02046316840685904, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019585604653693734, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01863108697347343, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01773693744558841, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.016620893944054842, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.015335131105966866, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.013439667767379433, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.011451600501313806, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00936273216502741, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.007066318896831945, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005159889301285148, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0036788692761911077, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.002285509309731424, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0013267823946080171, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.000858761042036349, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0006426185875898227, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00044700608654238747, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0003347979888349073, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00024979731773782987, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00021239607041934506, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00015476017750188476, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00012967779422069726, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0001343774351448701, "train/grad_039_lr1.2e+01_wd1.0e+00": 9.441957525837097e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00010242783926855736, "train/grad_041_lr1.6e+01_wd1.0e+00": 9.547620757331287e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00013794100048585277, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00012466044651148422, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0002290404778464783, "train/grad_045_lr3.1e+01_wd1.0e+00": 3.0621015662665836e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0001537272388622698, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0002803076899475909, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5146753787994385, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4198250770568848, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2860183715820312, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1725128889083862, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.0740101337432861, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9593386054039001, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.860005795955658, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7788622379302979, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7019864320755005, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6429540514945984, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.5985291600227356, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5488028526306152, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5129499435424805, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.47332194447517395, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4442399740219116, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.42180946469306946, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.3994542062282562, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.37834998965263367, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.3593965470790863, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.3455384373664856, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.33173805475234985, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.32058143615722656, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.3059559762477875, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.2992899417877197, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.2974548041820526, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2961386740207672, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.2968895137310028, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.29411429166793823, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.29932716488838196, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.2993675172328949, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.3043913245201111, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.301559180021286, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.2945118546485901, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.2676713466644287, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.26140305399894714, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.24233496189117432, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2343870848417282, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2287568300962448, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2408616989850998, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.25778207182884216, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2746773362159729, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2855597138404846, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2838056981563568, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2835606038570404, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.32116466760635376, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7057375311851501, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6881577372550964, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.7990273833274841, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5540674603174603, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5840773809523809, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6222718253968254, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6564980158730159, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6832837301587301, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7145337301587301, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7455357142857143, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.767609126984127, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7862103174603174, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8080357142857143, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8187003968253969, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8311011904761905, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8412698412698413, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8504464285714286, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8601190476190477, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8700396825396826, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.875, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8797123015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8856646825396826, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8898809523809523, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8948412698412699, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8993055555555556, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.90625, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9104662698412699, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9126984126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9146825396825397, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9184027777777778, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9221230158730159, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9228670634920635, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9233630952380952, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9255952380952381, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9273313492063492, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9293154761904762, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9372519841269841, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9439484126984127, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9434523809523809, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.949156746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9556051587301587, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9561011904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.957093253968254, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9620535714285714, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9598214285714286, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9632936507936508, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9605654761904762, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9623015873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9590773809523809, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9605654761904762, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.36707336223454845, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.41346938588748927, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.4817904108903553, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5351409884016487, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5831045685959131, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6386222758116995, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6898099305484956, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7203340864023189, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7441031829708052, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7693123198035426, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7862352052693043, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8024939155507521, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8159292926827717, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8271939462630294, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8388652104699887, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8506891788014144, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8564947811762975, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8628860269844254, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.868445865546276, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8734169692116736, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8784153791016807, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8844759313196064, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8922352517322268, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8958785177853811, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8998221759249289, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9016126179160854, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9062215159632323, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9109613930003672, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9119158786933509, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9127367170483994, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9147921250288259, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9143855783080096, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9164999636027231, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.925809625963789, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9328169110262488, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9340941597843585, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9389851906918409, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9464201383051406, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9492417321371251, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9472040693493091, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9549349550256025, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9542315871269542, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9522329723940769, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9559605032633689, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9554362414140256, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9551211855346292, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9530507742790623, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9558271448522394, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 9.160947054624557e-05, "validation/loss_best": 0.2835606038570404, "validation/acc_best": 0.9632936507936508, "validation/f1_best": 0.9559605032633689} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.35170712277293203, "train/grad": 0.10166161872446537, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.503106443285942, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4087221837043762, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2750625115633012, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1607341375946998, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.0600013771653176, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9406249386072159, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8348772400617599, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7468497550487518, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.661958564221859, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5948191797733307, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5436297170817852, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.48379387870430945, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.4383900681138039, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.3864249999821186, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3463281536847353, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3140995066612959, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.27932173177599906, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.24447335269302128, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.21111233331263066, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.18367901612073184, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.1535034516081214, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.12530692985281347, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.09397125953808427, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0674272451736033, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04649042211472988, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.029098757514730095, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.01862196274101734, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.012045935988426208, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0072317913640290495, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.004214841471984983, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0028935498371720314, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.002209334531798959, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0015898834075778723, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0011466220114380121, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0008293191529810429, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0005895021185278892, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00036511783488094805, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.00027506493031978605, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0001928950287401676, "train/loss_039_lr1.2e+01_wd1.0e+00": 9.832408279180526e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 8.937493897974491e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 7.91107676923275e-05, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00012504655867815018, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00010053354315459729, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00018067611381411552, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.7985710874199866e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.4002365320920944e-05, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.631199546158314e-05, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02024966792203486, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020171947428025305, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020317553617060183, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020765354530885814, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021474475939758122, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022659183098003268, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023750956412404777, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02435965825803578, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02447837238200009, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024203720074146985, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023815635424107313, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023195814499631526, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022626004070043563, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021885535032488405, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02124012468382716, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020630561322905125, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019872827050276102, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019013517377898095, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018049545921385288, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.017159606697969138, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.016028959793038665, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.014763900716789067, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.012926188812125474, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.010978636799845844, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.008909239372005686, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006601943747373298, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004705359729123302, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0032532688655192033, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.002026134462794289, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0011781679331033957, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0007885858928784728, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0006027487237588502, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00042231331070070156, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0003198902487929445, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00024023841386224375, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00019950549023633356, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0001471795614452276, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00012801088681044348, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0001281461611870327, "train/grad_039_lr1.2e+01_wd1.0e+00": 8.540254813851789e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00011132197795291177, "train/grad_041_lr1.6e+01_wd1.0e+00": 9.46035071635265e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0001313112347668266, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0001221782768095636, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00020868938672265357, "train/grad_045_lr3.1e+01_wd1.0e+00": 4.1679019416678875e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.00011495248586941575, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.00010486562796041909, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5100805759429932, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4151568412780762, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.281508445739746, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1681227684020996, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.069698691368103, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9552748203277588, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8565384149551392, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7756677269935608, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6993303894996643, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6406203508377075, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.5968624353408813, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5476359128952026, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5116811990737915, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.4725185036659241, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.44344693422317505, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4213016629219055, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.3987649083137512, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3778940737247467, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.35883215069770813, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.34506332874298096, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.33117687702178955, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.3198625147342682, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.30536168813705444, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.29877790808677673, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.2968371510505676, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2957228124141693, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.2970288097858429, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.294432133436203, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.3000832498073578, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.3005054295063019, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.30577245354652405, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.3028343617916107, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.29543861746788025, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.26847898960113525, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2621217668056488, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.2429816573858261, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.23416276276111603, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.22818081080913544, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.23952420055866241, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2560071051120758, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2720851004123688, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2829432487487793, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.28015565872192383, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2797021269798279, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.31598249077796936, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6928004026412964, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6775767207145691, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.7805505990982056, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5553075396825397, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5863095238095238, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6235119047619048, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6574900793650794, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6845238095238095, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7145337301587301, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7455357142857143, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7693452380952381, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7874503968253969, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8062996031746031, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8182043650793651, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8308531746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8412698412698413, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8521825396825397, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8608630952380952, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8692956349206349, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8747519841269841, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8816964285714286, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8869047619047619, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.890625, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8960813492063492, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9010416666666666, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9057539682539683, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9117063492063492, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9159226190476191, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9174107142857143, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.919890873015873, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.921875, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9243551587301587, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9241071428571429, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9253472222222222, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9270833333333334, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9298115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9367559523809523, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9434523809523809, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9437003968253969, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9501488095238095, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9556051587301587, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.957093253968254, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9613095238095238, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9600694444444444, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9635416666666666, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9608134920634921, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9623015873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9590773809523809, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9600694444444444, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3687848155690764, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.41659816333524735, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.4839016413298758, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5359275002255509, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5863515484857362, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6395096550797297, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6891105052000567, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7220953066817443, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7453656961984548, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7670626167988933, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7862283823883458, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.802293702669634, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8166223653798403, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8287546394021035, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8400807726992429, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8500073883905427, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.856496933557413, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8642016487104442, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8701267585160259, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8724464928367233, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.87867736662414, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8845238372229316, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8903482379803235, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8964095645098827, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.902101335549664, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9041034405125687, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9079915866773695, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9108790323278768, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9130343006293111, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9134130786699498, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9153135587210505, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9141934222682322, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9169383071051307, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.925261797452149, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9324926676391159, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9342679753919716, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9403747404027063, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9460224040102181, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9490829897975711, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9474016819671647, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9549349550256025, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9534569511213062, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9524517880953061, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9565002903672909, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9555761129762755, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9551768211274989, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9530507742790623, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9552394333484299, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 0.00010053354315459729, "validation/loss_best": 0.2797021269798279, "validation/acc_best": 0.9635416666666666, "validation/f1_best": 0.9565002903672909} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.3489439674466848, "train/grad": 0.1035954974964261, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.4879587334394455, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.3945508575439454, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.262919248342514, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1506917050480843, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.0523270946741103, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9359605431556701, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.83296101719141, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7466987672448159, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6628842394053936, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5961012530326844, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.544552167057991, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.4839718520641327, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.4377009366452694, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.3843144316226244, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3429044806957245, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3096848855167627, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.2739416526630521, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.23829362384974956, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2042429844290018, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.1763841137290001, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.14591203531250357, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.11774780338630081, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.08809785940684378, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.06240689549595117, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04267194287851453, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.026491993851959706, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.016931440997868776, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.010998135209083557, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.006716263657435775, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.003982315398752689, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0027208184450864792, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.002085424866527319, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0015088098030537366, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0011048330552875995, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0007856968697160482, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0005703890975564718, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00036639331839978696, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0002634087484329939, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00020599144510924817, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.00010198582895100116, "train/loss_040_lr1.4e+01_wd1.0e+00": 8.741151541471481e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 7.510373368859291e-05, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00012409736402332783, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00010556922294199466, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.000184444272890687, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.4013228937983513e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.6162744984030726e-05, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.7958499267697333e-05, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02057165420614183, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02052790321409702, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020718738879077138, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021201516981236637, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021931922677904368, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023126872489228845, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02421511387452483, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.024812256293371318, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02492861052043736, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02469136729836464, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024337407713755966, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0237681778986007, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023233364280313255, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02251734004355967, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021850806828588247, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021228947560302913, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020453753722831605, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019530446492135523, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01848804364912212, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.017496246229857206, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0162265167478472, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.014782784203998745, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01280824268469587, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.010713483255822212, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.008523301727836951, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006180910650873557, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004359774743788875, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0030199630931019785, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0019016621002811006, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0011343604297144338, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0007623690299806185, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0005779335275292397, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00041337436428875665, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0003105667523777811, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00023175984861154575, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00019623756270448212, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00014562016738636885, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0001246725105738733, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.000124407305747809, "train/grad_039_lr1.2e+01_wd1.0e+00": 8.54193693680827e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 9.798039612860521e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 8.599756258021784e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00012647045124595025, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00011565753385298194, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0002203038482809916, "train/grad_045_lr3.1e+01_wd1.0e+00": 4.741535102975391e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.00013685087953857301, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.00012164766956539713, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5083673000335693, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4134418964385986, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2797333002090454, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1664472818374634, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.067962408065796, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9538447260856628, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.855150043964386, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7745200991630554, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6983991861343384, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6398894190788269, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.5962212681770325, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5468911528587341, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5113736987113953, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.47192153334617615, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.442884236574173, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.42070966958999634, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.39823710918426514, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3774177134037018, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.35878095030784607, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.34435707330703735, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.3304084241390228, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.31974393129348755, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.30535170435905457, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.29957935214042664, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.2980705201625824, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.29687508940696716, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.2983177602291107, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.2956526577472687, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.3008784055709839, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.3010636866092682, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.3058525323867798, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.3031747341156006, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.295291543006897, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.26853829622268677, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2619611322879791, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.2425485998392105, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.23414215445518494, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2281898409128189, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.23906449973583221, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2549700438976288, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.27137985825538635, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.28208282589912415, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2785477340221405, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2781159281730652, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.314287006855011, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6885162591934204, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6723502278327942, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.7758281826972961, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5562996031746031, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5858134920634921, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6247519841269841, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6572420634920635, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6845238095238095, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7157738095238095, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7462797619047619, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7688492063492064, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7864583333333334, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8070436507936508, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8189484126984127, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8306051587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8415178571428571, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8516865079365079, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8606150793650794, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8700396825396826, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8757440476190477, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8816964285714286, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.886656746031746, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8918650793650794, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8948412698412699, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9007936507936508, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9084821428571429, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9122023809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9144345238095238, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9176587301587301, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9188988095238095, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9211309523809523, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9233630952380952, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.923859126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9253472222222222, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9265873015873016, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9298115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9362599206349206, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9434523809523809, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9437003968253969, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9496527777777778, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9556051587301587, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9561011904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9578373015873016, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9618055555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9600694444444444, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9632936507936508, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9608134920634921, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9590773809523809, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9605654761904762, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.36965900298037074, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4164176718576823, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.4858019032599544, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5365605834862761, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5875361022092369, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6406142131330919, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6905411358840442, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7213861780758005, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7440144725648975, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7678538242410142, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7865520616849302, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8018320653042788, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8169492891483502, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8284552918681927, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8393722463260318, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.850915593579449, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8571929394115626, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8645114044780439, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8695670260185808, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8750357480543594, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8776623851041284, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8851721579293135, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.893715183768074, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8970713352068328, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9001900965478782, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9037748507500484, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.90627785684948, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9101163650395935, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9126072531239989, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9132778724775277, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9149295368819826, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9131362468233604, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9167675532838017, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9245615353257948, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9328774961753987, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9346000124959348, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9398355764741579, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9460224040102181, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9489795889177239, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.948453136324999, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9549349550256025, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9538320685086832, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9527669026071102, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9560105957135567, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9556149048379051, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9555498949103984, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9530507742790623, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.955801293021012, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 0.00010556922294199466, "validation/loss_best": 0.2781159281730652, "validation/acc_best": 0.9632936507936508, "validation/f1_best": 0.9560105957135567} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.34736735343933106, "train/grad": 0.10119434878230095, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.4870960909128188, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.3919364732503892, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.257939648926258, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1438064938783645, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.043874053657055, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9257148426771163, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8218909651041031, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7355384123325348, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6525652641057968, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5872494427859783, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5373717650771141, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.4789728359878063, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.4345054040849209, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.3830844303965569, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.34288744628429413, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.31038723543286323, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.27497538439929486, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.23937804654240608, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.20538042858242989, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.17763035096228122, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.14744409276172518, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.11952035527676344, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.08976050900295376, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.06407233711332083, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04408554814755917, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.027444628793746234, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.017237821649760007, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.011137187257409096, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.006726190214976668, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.004016969036310911, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0027564203552901743, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0021227083541452887, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0015346159227192402, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0011203132849186659, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.000799494981765747, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0005696763936430216, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0003550797235220671, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0002623975928872824, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00019078855402767658, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.00010610025376081467, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00010186498053371906, "train/loss_041_lr1.6e+01_wd1.0e+00": 7.192803546786308e-05, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0001259916741400957, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00010312982834875583, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00017978223972022533, "train/loss_045_lr3.1e+01_wd1.0e+00": 9.032469242811204e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.84735332429409e-05, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.7244046106934546e-05, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0201492103561759, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020110607310198248, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020300639839842916, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020773807116784157, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021486378721892834, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022649578894488515, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023727842890657484, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.024331226595677435, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.024463154594413936, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024211548957973717, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023819028260186316, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023192070429213344, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022604494583792985, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021847156840376556, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021177393440157174, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02056224899366498, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019819833585061133, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.018960754950530827, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.017969144815579055, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01703281367197633, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01583290323149413, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01446003895951435, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01261513568693772, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.010565833759028465, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.008444910782855005, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006176895335083827, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004360768302576617, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0030278700380586086, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0018690967760630884, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0011051805361057632, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0007507685710152146, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0005721428791002837, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0004120605706702918, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00030572493888030293, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00023420661444106372, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00019585120786359768, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00014526995260894182, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0001233656150452589, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00011960956936036383, "train/grad_039_lr1.2e+01_wd1.0e+00": 8.760384791003162e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00010488577536250431, "train/grad_041_lr1.6e+01_wd1.0e+00": 8.709846085878325e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00013587246836294754, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00012466681870016317, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00022299713608617822, "train/grad_045_lr3.1e+01_wd1.0e+00": 4.813065383022987e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.00014662304269359188, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.00012464339896519654, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5081144571304321, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.413201928138733, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2795655727386475, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1661721467971802, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.0676847696304321, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9536360502243042, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8549833297729492, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7744340300559998, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6982680559158325, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6398221850395203, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.5960316061973572, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5467963218688965, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5112390518188477, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.4718162715435028, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4427856504917145, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.42073264718055725, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.39821726083755493, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3773743808269501, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.35841822624206543, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.34409722685813904, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.3304266035556793, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.3194490373134613, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.30501508712768555, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.29930758476257324, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.29782283306121826, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2965228855609894, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.2978161871433258, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.2953307032585144, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.300875723361969, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.30119577050209045, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.30581215023994446, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.3031582832336426, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.2955094873905182, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.26871392130851746, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2620965242385864, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.24281960725784302, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.23416025936603546, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.22827380895614624, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.23885636031627655, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2549590766429901, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2710436284542084, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2820753753185272, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2782610356807709, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2780441343784332, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.31421127915382385, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6881058812141418, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.671744704246521, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.7742489576339722, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5565476190476191, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5858134920634921, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6247519841269841, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6572420634920635, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6845238095238095, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7165178571428571, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.746031746031746, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7690972222222222, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7864583333333334, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8077876984126984, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8189484126984127, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8308531746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8412698412698413, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8516865079365079, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8603670634920635, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8700396825396826, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8752480158730159, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8809523809523809, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.886656746031746, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8913690476190477, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.894593253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9007936507936508, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.908234126984127, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9124503968253969, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9139384920634921, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9169146825396826, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9184027777777778, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.921875, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9241071428571429, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.923859126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9253472222222222, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9268353174603174, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9298115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9362599206349206, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9434523809523809, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9434523809523809, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9496527777777778, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9556051587301587, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9561011904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9575892857142857, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9620535714285714, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9603174603174603, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9635416666666666, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9603174603174603, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9588293650793651, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9600694444444444, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3698224138471028, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.416337426614632, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.485640323517342, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5365605834862761, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5875733526261738, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6413359408936904, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6902158419222586, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7217638830757707, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7436342248245994, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7690191096127871, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7861624426902325, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8025807542762384, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8162946026928752, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8283545609435394, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8391420376331149, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8509668226733614, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8566048155923249, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8637257848770895, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8693898129412881, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8743114937347876, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.877508106708453, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8848745225853036, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8928958831089747, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8974124648834378, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8997797778850021, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9032477171932559, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9059621624890681, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9107071874091194, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9130669663502962, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9132576006822426, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9151408122611218, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9136397847446376, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9171072412382222, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9245615353257948, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9328774961753987, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9342495016843544, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9398355764741579, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9460224040102181, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9489795889177239, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9480666044852049, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9549349550256025, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9540368615769598, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9529889423013722, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9561333052343192, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9547230826000911, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9555498949103984, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.952872013721553, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9550590708904222, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 0.00010312982834875583, "validation/loss_best": 0.2780441343784332, "validation/acc_best": 0.9635416666666666, "validation/f1_best": 0.9561333052343192} diff --git a/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/config.yaml b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6836b482c2d3231a8a76ae54a26ff410fd27b59 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n100_2; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn +remote_dir: null diff --git a/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..b97713aa6e2e91a0302481072e2cf6a52776ec06 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 17, "eval/id_best": 17, "eval/lr_best": 9.599999999999999e-05, "eval/wd_best": 0.05, "eval/train/loss": 2.2801589965820312, "eval/train/acc": 0.3174037309075263, "eval/train/acc_std": 0.0021720362405662453, "eval/train/f1": 0.24996921159420885, "eval/train/f1_std": 0.002174505316110855, "eval/validation/loss": 2.5121238231658936, "eval/validation/acc": 0.25212255444813586, "eval/validation/acc_std": 0.005558349328010697, "eval/validation/f1": 0.178891054967223, "eval/validation/f1_std": 0.0046179386179950035, "eval/test/loss": 2.4678409099578857, "eval/test/acc": 0.25918367346938775, "eval/test/acc_std": 0.005123598094173278, "eval/test/f1": 0.18567873372894725, "eval/test/f1_std": 0.004625389002219636, "eval/testid/loss": 2.4389657974243164, "eval/testid/acc": 0.2614227877385772, "eval/testid/acc_std": 0.005253431050437182, "eval/testid/f1": 0.1981972157232437, "eval/testid/f1_std": 0.00478330422356263} diff --git a/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..733a31544ac19a71e6a9a06d2a1826f153e924b5 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 17, "eval/best/id_best": 17, "eval/best/lr_best": 9.599999999999999e-05, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.2801589965820312, "eval/best/train/acc": 0.3174037309075263, "eval/best/train/acc_std": 0.0021720362405662453, "eval/best/train/f1": 0.24996921159420885, "eval/best/train/f1_std": 0.002174505316110855, "eval/best/validation/loss": 2.5121238231658936, "eval/best/validation/acc": 0.25212255444813586, "eval/best/validation/acc_std": 0.005558349328010697, "eval/best/validation/f1": 0.178891054967223, "eval/best/validation/f1_std": 0.0046179386179950035, "eval/best/test/loss": 2.4678409099578857, "eval/best/test/acc": 0.25918367346938775, "eval/best/test/acc_std": 0.005123598094173278, "eval/best/test/f1": 0.18567873372894725, "eval/best/test/f1_std": 0.004625389002219636, "eval/best/testid/loss": 2.4389657974243164, "eval/best/testid/acc": 0.2614227877385772, "eval/best/testid/acc_std": 0.005253431050437182, "eval/best/testid/f1": 0.1981972157232437, "eval/best/testid/f1_std": 0.00478330422356263} diff --git a/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..96b4797fdc5acfccd3d222e80dee451a64fd46cf --- /dev/null +++ b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 17, "eval/last/lr_best": 9.599999999999999e-05, "eval/last/wd_best": 0.05, "eval/last/train/loss": 2.278407096862793, "eval/last/train/acc": 0.31801837794646426, "eval/last/train/acc_std": 0.002194595491474176, "eval/last/train/f1": 0.2495535751701682, "eval/last/train/f1_std": 0.0021728134707087207, "eval/last/validation/loss": 2.5161631107330322, "eval/last/validation/acc": 0.2502768549280177, "eval/last/validation/acc_std": 0.0054280929764676635, "eval/last/validation/f1": 0.1774014094554318, "eval/last/validation/f1_std": 0.004530486232532907, "eval/last/test/loss": 2.4701080322265625, "eval/last/test/acc": 0.2560296846011132, "eval/last/test/acc_std": 0.005052262139712686, "eval/last/test/f1": 0.1818746620569582, "eval/last/test/f1_std": 0.00453373808990997, "eval/last/testid/loss": 2.4380226135253906, "eval/last/testid/acc": 0.26045883940620784, "eval/last/testid/acc_std": 0.005337878665213572, "eval/last/testid/f1": 0.19665621106494888, "eval/last/testid/f1_std": 0.004862487025304217} diff --git a/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..09d28fab2e7a21f2ba425f18ff68cad34cc47352 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,17,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",train,2.2801589965820312,0.3174037309075263,0.0021720362405662453,0.24996921159420885,0.002174505316110855 +flat_mae,patch,attn,nsd_cococlip,best,17,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",validation,2.5121238231658936,0.25212255444813586,0.005558349328010697,0.178891054967223,0.0046179386179950035 +flat_mae,patch,attn,nsd_cococlip,best,17,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",test,2.4678409099578857,0.25918367346938775,0.005123598094173278,0.18567873372894725,0.004625389002219636 +flat_mae,patch,attn,nsd_cococlip,best,17,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",testid,2.4389657974243164,0.2614227877385772,0.005253431050437182,0.1981972157232437,0.00478330422356263 diff --git a/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..09d28fab2e7a21f2ba425f18ff68cad34cc47352 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,17,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",train,2.2801589965820312,0.3174037309075263,0.0021720362405662453,0.24996921159420885,0.002174505316110855 +flat_mae,patch,attn,nsd_cococlip,best,17,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",validation,2.5121238231658936,0.25212255444813586,0.005558349328010697,0.178891054967223,0.0046179386179950035 +flat_mae,patch,attn,nsd_cococlip,best,17,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",test,2.4678409099578857,0.25918367346938775,0.005123598094173278,0.18567873372894725,0.004625389002219636 +flat_mae,patch,attn,nsd_cococlip,best,17,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",testid,2.4389657974243164,0.2614227877385772,0.005253431050437182,0.1981972157232437,0.00478330422356263 diff --git a/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..42216803c13ef8e76704a9935cf8c39ccc31d4db --- /dev/null +++ b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,last,19,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",train,2.278407096862793,0.31801837794646426,0.002194595491474176,0.2495535751701682,0.0021728134707087207 +flat_mae,patch,attn,nsd_cococlip,last,19,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",validation,2.5161631107330322,0.2502768549280177,0.0054280929764676635,0.1774014094554318,0.004530486232532907 +flat_mae,patch,attn,nsd_cococlip,last,19,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",test,2.4701080322265625,0.2560296846011132,0.005052262139712686,0.1818746620569582,0.00453373808990997 +flat_mae,patch,attn,nsd_cococlip,last,19,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",testid,2.4380226135253906,0.26045883940620784,0.005337878665213572,0.19665621106494888,0.004862487025304217 diff --git a/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/log.txt b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2804f8af20c913052e31b1137539c4148ce6cd15 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/log.txt @@ -0,0 +1,967 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 19:52:41 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n100_2; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 58.8M (58.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:23:59 lr: nan time: 3.5979 data: 3.0040 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:50 lr: 0.000003 loss: 3.1987 (3.2031) grad: 0.1707 (0.1773) time: 0.4574 data: 0.0042 max mem: 22448 +train: [0] [ 40/400] eta: 0:03:11 lr: 0.000006 loss: 3.1899 (3.1878) grad: 0.1707 (0.1749) time: 0.4515 data: 0.0043 max mem: 22448 +train: [0] [ 60/400] eta: 0:02:51 lr: 0.000009 loss: 3.1775 (3.1879) grad: 0.1686 (0.1740) time: 0.4535 data: 0.0042 max mem: 22448 +train: [0] [ 80/400] eta: 0:02:37 lr: 0.000012 loss: 3.1726 (3.1833) grad: 0.1663 (0.1718) time: 0.4541 data: 0.0041 max mem: 22448 +train: [0] [100/400] eta: 0:02:25 lr: 0.000015 loss: 3.1695 (3.1806) grad: 0.1597 (0.1694) time: 0.4595 data: 0.0041 max mem: 22448 +train: [0] [120/400] eta: 0:02:14 lr: 0.000018 loss: 3.1566 (3.1774) grad: 0.1526 (0.1666) time: 0.4447 data: 0.0042 max mem: 22448 +train: [0] [140/400] eta: 0:02:08 lr: 0.000021 loss: 3.1592 (3.1761) grad: 0.1530 (0.1660) time: 0.5738 data: 0.1236 max mem: 22448 +train: [0] [160/400] eta: 0:01:57 lr: 0.000024 loss: 3.1560 (3.1717) grad: 0.1716 (0.1668) time: 0.4584 data: 0.0058 max mem: 22448 +train: [0] [180/400] eta: 0:01:46 lr: 0.000027 loss: 3.1333 (3.1684) grad: 0.1640 (0.1661) time: 0.4455 data: 0.0041 max mem: 22448 +train: [0] [200/400] eta: 0:01:36 lr: 0.000030 loss: 3.1582 (3.1678) grad: 0.1523 (0.1646) time: 0.4840 data: 0.0044 max mem: 22448 +train: [0] [220/400] eta: 0:01:26 lr: 0.000033 loss: 3.1699 (3.1678) grad: 0.1521 (0.1636) time: 0.4611 data: 0.0043 max mem: 22448 +train: [0] [240/400] eta: 0:01:16 lr: 0.000036 loss: 3.1600 (3.1669) grad: 0.1539 (0.1627) time: 0.4457 data: 0.0042 max mem: 22448 +train: [0] [260/400] eta: 0:01:06 lr: 0.000039 loss: 3.1523 (3.1657) grad: 0.1473 (0.1615) time: 0.4608 data: 0.0045 max mem: 22448 +train: [0] [280/400] eta: 0:00:57 lr: 0.000042 loss: 3.1507 (3.1645) grad: 0.1453 (0.1606) time: 0.4524 data: 0.0044 max mem: 22448 +train: [0] [300/400] eta: 0:00:47 lr: 0.000045 loss: 3.1404 (3.1621) grad: 0.1466 (0.1603) time: 0.4484 data: 0.0044 max mem: 22448 +train: [0] [320/400] eta: 0:00:37 lr: 0.000048 loss: 3.1289 (3.1605) grad: 0.1577 (0.1604) time: 0.4622 data: 0.0042 max mem: 22448 +train: [0] [340/400] eta: 0:00:28 lr: 0.000051 loss: 3.1386 (3.1589) grad: 0.1580 (0.1604) time: 0.4576 data: 0.0044 max mem: 22448 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 3.1250 (3.1568) grad: 0.1586 (0.1608) time: 0.4520 data: 0.0040 max mem: 22448 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 3.1169 (3.1544) grad: 0.1686 (0.1612) time: 0.4790 data: 0.0045 max mem: 22448 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.1137 (3.1531) grad: 0.1686 (0.1617) time: 0.4581 data: 0.0043 max mem: 22448 +train: [0] Total time: 0:03:08 (0.4711 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.1137 (3.1531) grad: 0.1686 (0.1617) +eval (validation): [0] [ 0/85] eta: 0:04:32 time: 3.2020 data: 2.9252 max mem: 22448 +eval (validation): [0] [20/85] eta: 0:00:32 time: 0.3634 data: 0.0057 max mem: 22448 +eval (validation): [0] [40/85] eta: 0:00:18 time: 0.3360 data: 0.0035 max mem: 22448 +eval (validation): [0] [60/85] eta: 0:00:09 time: 0.3409 data: 0.0042 max mem: 22448 +eval (validation): [0] [80/85] eta: 0:00:01 time: 0.3910 data: 0.0471 max mem: 22448 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3882 data: 0.0466 max mem: 22448 +eval (validation): [0] Total time: 0:00:33 (0.3933 s / it) +cv: [0] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 2.719 acc: 0.203 f1: 0.122 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:22:54 lr: nan time: 3.4369 data: 3.0610 max mem: 22448 +train: [1] [ 20/400] eta: 0:03:53 lr: 0.000063 loss: 3.0796 (3.0783) grad: 0.1695 (0.1722) time: 0.4725 data: 0.0188 max mem: 22448 +train: [1] [ 40/400] eta: 0:03:12 lr: 0.000066 loss: 3.0888 (3.0804) grad: 0.1626 (0.1666) time: 0.4491 data: 0.0039 max mem: 22448 +train: [1] [ 60/400] eta: 0:02:52 lr: 0.000069 loss: 3.0604 (3.0659) grad: 0.1609 (0.1667) time: 0.4524 data: 0.0040 max mem: 22448 +train: [1] [ 80/400] eta: 0:02:38 lr: 0.000072 loss: 3.0533 (3.0651) grad: 0.1674 (0.1689) time: 0.4584 data: 0.0043 max mem: 22448 +train: [1] [100/400] eta: 0:02:25 lr: 0.000075 loss: 3.0541 (3.0613) grad: 0.1739 (0.1714) time: 0.4496 data: 0.0041 max mem: 22448 +train: [1] [120/400] eta: 0:02:14 lr: 0.000078 loss: 3.0541 (3.0589) grad: 0.1822 (0.1731) time: 0.4484 data: 0.0041 max mem: 22448 +train: [1] [140/400] eta: 0:02:03 lr: 0.000081 loss: 3.0510 (3.0572) grad: 0.1789 (0.1747) time: 0.4502 data: 0.0042 max mem: 22448 +train: [1] [160/400] eta: 0:01:53 lr: 0.000084 loss: 3.0480 (3.0577) grad: 0.1778 (0.1750) time: 0.4410 data: 0.0041 max mem: 22448 +train: [1] [180/400] eta: 0:01:43 lr: 0.000087 loss: 3.0480 (3.0568) grad: 0.1772 (0.1757) time: 0.4641 data: 0.0042 max mem: 22448 +train: [1] [200/400] eta: 0:01:34 lr: 0.000090 loss: 3.0251 (3.0544) grad: 0.1778 (0.1766) time: 0.4679 data: 0.0042 max mem: 22448 +train: [1] [220/400] eta: 0:01:24 lr: 0.000093 loss: 2.9974 (3.0484) grad: 0.1917 (0.1783) time: 0.4447 data: 0.0043 max mem: 22448 +train: [1] [240/400] eta: 0:01:14 lr: 0.000096 loss: 2.9974 (3.0449) grad: 0.1917 (0.1787) time: 0.4575 data: 0.0041 max mem: 22448 +train: [1] [260/400] eta: 0:01:05 lr: 0.000099 loss: 3.0142 (3.0436) grad: 0.1875 (0.1797) time: 0.4552 data: 0.0042 max mem: 22448 +train: [1] [280/400] eta: 0:00:55 lr: 0.000102 loss: 3.0136 (3.0404) grad: 0.1912 (0.1804) time: 0.4425 data: 0.0043 max mem: 22448 +train: [1] [300/400] eta: 0:00:46 lr: 0.000105 loss: 2.9956 (3.0387) grad: 0.1912 (0.1811) time: 0.4564 data: 0.0043 max mem: 22448 +train: [1] [320/400] eta: 0:00:37 lr: 0.000108 loss: 2.9807 (3.0352) grad: 0.1932 (0.1821) time: 0.4507 data: 0.0042 max mem: 22448 +train: [1] [340/400] eta: 0:00:27 lr: 0.000111 loss: 2.9755 (3.0317) grad: 0.1932 (0.1826) time: 0.4563 data: 0.0041 max mem: 22448 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 3.0016 (3.0294) grad: 0.1877 (0.1829) time: 0.4655 data: 0.0042 max mem: 22448 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 2.9677 (3.0253) grad: 0.1877 (0.1837) time: 0.4582 data: 0.0041 max mem: 22448 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 2.9632 (3.0230) grad: 0.1986 (0.1848) time: 0.4408 data: 0.0043 max mem: 22448 +train: [1] Total time: 0:03:04 (0.4618 s / it) +train: [1] Summary: lr: 0.000120 loss: 2.9632 (3.0230) grad: 0.1986 (0.1848) +eval (validation): [1] [ 0/85] eta: 0:04:40 time: 3.3015 data: 3.0642 max mem: 22448 +eval (validation): [1] [20/85] eta: 0:00:36 time: 0.4280 data: 0.0925 max mem: 22448 +eval (validation): [1] [40/85] eta: 0:00:23 time: 0.4744 data: 0.1348 max mem: 22448 +eval (validation): [1] [60/85] eta: 0:00:11 time: 0.3246 data: 0.0042 max mem: 22448 +eval (validation): [1] [80/85] eta: 0:00:02 time: 0.3223 data: 0.0034 max mem: 22448 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3093 data: 0.0039 max mem: 22448 +eval (validation): [1] Total time: 0:00:35 (0.4215 s / it) +cv: [1] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 2.597 acc: 0.226 f1: 0.154 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:36:27 lr: nan time: 5.4688 data: 5.0898 max mem: 22448 +train: [2] [ 20/400] eta: 0:04:31 lr: 0.000123 loss: 2.9553 (2.9600) grad: 0.2186 (0.2173) time: 0.4776 data: 0.0436 max mem: 22448 +train: [2] [ 40/400] eta: 0:03:33 lr: 0.000126 loss: 2.9635 (2.9628) grad: 0.2138 (0.2164) time: 0.4619 data: 0.0051 max mem: 22448 +train: [2] [ 60/400] eta: 0:03:04 lr: 0.000129 loss: 2.9602 (2.9610) grad: 0.2080 (0.2148) time: 0.4455 data: 0.0038 max mem: 22448 +train: [2] [ 80/400] eta: 0:02:46 lr: 0.000132 loss: 2.9472 (2.9610) grad: 0.2254 (0.2195) time: 0.4479 data: 0.0044 max mem: 22448 +train: [2] [100/400] eta: 0:02:31 lr: 0.000135 loss: 2.9469 (2.9604) grad: 0.2607 (0.2368) time: 0.4439 data: 0.0042 max mem: 22448 +train: [2] [120/400] eta: 0:02:18 lr: 0.000138 loss: 3.0107 (3.0015) grad: 0.3478 (0.3268) time: 0.4464 data: 0.0043 max mem: 22448 +WARNING: classifier 48 (50, 1.0) diverged (loss=84.11 > 63.56) at step 468. Freezing. +train: [2] [140/400] eta: 0:02:06 lr: 0.000141 loss: 3.3472 (3.0999) grad: 0.9542 (0.4773) time: 0.4457 data: 0.0043 max mem: 22448 +train: [2] [160/400] eta: 0:01:56 lr: 0.000144 loss: 2.9770 (3.0786) grad: 0.2315 (0.4453) time: 0.4703 data: 0.0042 max mem: 22448 +train: [2] [180/400] eta: 0:01:46 lr: 0.000147 loss: 2.9154 (3.0601) grad: 0.2209 (0.4207) time: 0.4568 data: 0.0042 max mem: 22448 +train: [2] [200/400] eta: 0:01:35 lr: 0.000150 loss: 2.9036 (3.0450) grad: 0.2232 (0.4018) time: 0.4438 data: 0.0043 max mem: 22448 +train: [2] [220/400] eta: 0:01:26 lr: 0.000153 loss: 2.9191 (3.0368) grad: 0.2449 (0.3886) time: 0.4678 data: 0.0043 max mem: 22448 +train: [2] [240/400] eta: 0:01:16 lr: 0.000156 loss: 2.9829 (3.0334) grad: 0.2603 (0.3808) time: 0.4551 data: 0.0041 max mem: 22448 +train: [2] [260/400] eta: 0:01:06 lr: 0.000159 loss: 3.0398 (3.0427) grad: 0.3467 (0.4113) time: 0.4401 data: 0.0042 max mem: 22448 +WARNING: classifier 47 (43, 1.0) diverged (loss=75.94 > 63.56) at step 535. Freezing. +train: [2] [280/400] eta: 0:00:56 lr: 0.000162 loss: 3.1504 (3.0663) grad: 0.7249 (0.4424) time: 0.4525 data: 0.0042 max mem: 22448 +train: [2] [300/400] eta: 0:00:47 lr: 0.000165 loss: 2.9308 (3.0547) grad: 0.2201 (0.4270) time: 0.4511 data: 0.0041 max mem: 22448 +train: [2] [320/400] eta: 0:00:37 lr: 0.000168 loss: 2.8853 (3.0443) grad: 0.2058 (0.4130) time: 0.4572 data: 0.0042 max mem: 22448 +train: [2] [340/400] eta: 0:00:28 lr: 0.000171 loss: 2.8867 (3.0369) grad: 0.2110 (0.4016) time: 0.4614 data: 0.0042 max mem: 22448 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 2.9032 (3.0303) grad: 0.2168 (0.3914) time: 0.4611 data: 0.0044 max mem: 22448 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 2.9286 (3.0261) grad: 0.2368 (0.3860) time: 0.4902 data: 0.0042 max mem: 22448 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 3.0245 (3.0328) grad: 0.3758 (0.4053) time: 0.4575 data: 0.0043 max mem: 22448 +train: [2] Total time: 0:03:07 (0.4695 s / it) +train: [2] Summary: lr: 0.000180 loss: 3.0245 (3.0328) grad: 0.3758 (0.4053) +eval (validation): [2] [ 0/85] eta: 0:04:39 time: 3.2832 data: 3.0413 max mem: 22448 +eval (validation): [2] [20/85] eta: 0:00:31 time: 0.3383 data: 0.0037 max mem: 22448 +eval (validation): [2] [40/85] eta: 0:00:18 time: 0.3387 data: 0.0040 max mem: 22448 +eval (validation): [2] [60/85] eta: 0:00:09 time: 0.3437 data: 0.0042 max mem: 22448 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3348 data: 0.0041 max mem: 22448 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3292 data: 0.0040 max mem: 22448 +eval (validation): [2] Total time: 0:00:31 (0.3761 s / it) +cv: [2] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 2.556 acc: 0.240 f1: 0.168 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:21:55 lr: nan time: 3.2887 data: 2.9609 max mem: 22448 +WARNING: classifier 46 (36, 1.0) diverged (loss=65.80 > 63.56) at step 603. Freezing. +train: [3] [ 20/400] eta: 0:03:40 lr: 0.000183 loss: 2.8718 (3.1773) grad: 0.2107 (0.6778) time: 0.4444 data: 0.0044 max mem: 22448 +train: [3] [ 40/400] eta: 0:03:06 lr: 0.000186 loss: 2.9062 (3.0506) grad: 0.2161 (0.4519) time: 0.4541 data: 0.0038 max mem: 22448 +train: [3] [ 60/400] eta: 0:02:48 lr: 0.000189 loss: 2.8881 (2.9936) grad: 0.2224 (0.3733) time: 0.4513 data: 0.0042 max mem: 22448 +train: [3] [ 80/400] eta: 0:02:34 lr: 0.000192 loss: 2.8632 (2.9644) grad: 0.2123 (0.3329) time: 0.4434 data: 0.0041 max mem: 22448 +train: [3] [100/400] eta: 0:02:23 lr: 0.000195 loss: 2.8581 (2.9440) grad: 0.2089 (0.3093) time: 0.4512 data: 0.0042 max mem: 22448 +train: [3] [120/400] eta: 0:02:12 lr: 0.000198 loss: 2.8576 (2.9289) grad: 0.2185 (0.2953) time: 0.4495 data: 0.0042 max mem: 22448 +train: [3] [140/400] eta: 0:02:01 lr: 0.000201 loss: 2.8576 (2.9226) grad: 0.2346 (0.2882) time: 0.4452 data: 0.0044 max mem: 22448 +train: [3] [160/400] eta: 0:01:52 lr: 0.000204 loss: 2.8888 (2.9191) grad: 0.2453 (0.2827) time: 0.4764 data: 0.0046 max mem: 22448 +train: [3] [180/400] eta: 0:01:42 lr: 0.000207 loss: 2.8888 (2.9120) grad: 0.2438 (0.2783) time: 0.4424 data: 0.0043 max mem: 22448 +train: [3] [200/400] eta: 0:01:32 lr: 0.000210 loss: 2.8705 (2.9121) grad: 0.2457 (0.2768) time: 0.4430 data: 0.0043 max mem: 22448 +train: [3] [220/400] eta: 0:01:23 lr: 0.000213 loss: 2.9120 (2.9156) grad: 0.2759 (0.2920) time: 0.4623 data: 0.0042 max mem: 22448 +WARNING: classifier 45 (31, 1.0) diverged (loss=66.38 > 63.56) at step 719. Freezing. +train: [3] [240/400] eta: 0:01:13 lr: 0.000216 loss: 3.0322 (2.9624) grad: 0.7472 (0.3847) time: 0.4437 data: 0.0042 max mem: 22448 +train: [3] [260/400] eta: 0:01:04 lr: 0.000219 loss: 2.9531 (2.9553) grad: 0.2600 (0.3729) time: 0.4401 data: 0.0042 max mem: 22448 +train: [3] [280/400] eta: 0:00:55 lr: 0.000222 loss: 2.8608 (2.9472) grad: 0.2272 (0.3627) time: 0.4570 data: 0.0043 max mem: 22448 +train: [3] [300/400] eta: 0:00:45 lr: 0.000225 loss: 2.8784 (2.9435) grad: 0.2291 (0.3548) time: 0.4523 data: 0.0042 max mem: 22448 +train: [3] [320/400] eta: 0:00:36 lr: 0.000228 loss: 2.8912 (2.9405) grad: 0.2731 (0.3537) time: 0.4536 data: 0.0041 max mem: 22448 +train: [3] [340/400] eta: 0:00:27 lr: 0.000231 loss: 2.9424 (2.9578) grad: 0.4246 (0.3854) time: 0.4559 data: 0.0041 max mem: 22448 +WARNING: classifier 44 (26, 1.0) diverged (loss=69.34 > 63.56) at step 778. Freezing. +train: [3] [360/400] eta: 0:00:18 lr: 0.000234 loss: 3.3448 (2.9989) grad: 1.1328 (0.4434) time: 0.4427 data: 0.0041 max mem: 22448 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 2.9113 (2.9906) grad: 0.2289 (0.4316) time: 0.4449 data: 0.0041 max mem: 22448 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 2.8200 (2.9825) grad: 0.2158 (0.4208) time: 0.4399 data: 0.0044 max mem: 22448 +train: [3] Total time: 0:03:02 (0.4570 s / it) +train: [3] Summary: lr: 0.000240 loss: 2.8200 (2.9825) grad: 0.2158 (0.4208) +eval (validation): [3] [ 0/85] eta: 0:04:42 time: 3.3229 data: 3.0421 max mem: 22448 +eval (validation): [3] [20/85] eta: 0:00:31 time: 0.3409 data: 0.0032 max mem: 22448 +eval (validation): [3] [40/85] eta: 0:00:18 time: 0.3391 data: 0.0041 max mem: 22448 +eval (validation): [3] [60/85] eta: 0:00:09 time: 0.3310 data: 0.0043 max mem: 22448 +eval (validation): [3] [80/85] eta: 0:00:01 time: 0.3211 data: 0.0039 max mem: 22448 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.3130 data: 0.0038 max mem: 22448 +eval (validation): [3] Total time: 0:00:31 (0.3706 s / it) +cv: [3] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 2.580 acc: 0.230 f1: 0.157 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [4] [ 0/400] eta: 0:22:32 lr: nan time: 3.3825 data: 3.0539 max mem: 22448 +train: [4] [ 20/400] eta: 0:03:34 lr: 0.000243 loss: 2.8024 (2.8050) grad: 0.2216 (0.2214) time: 0.4223 data: 0.0036 max mem: 22448 +train: [4] [ 40/400] eta: 0:03:03 lr: 0.000246 loss: 2.8228 (2.8143) grad: 0.2226 (0.2213) time: 0.4549 data: 0.0040 max mem: 22448 +train: [4] [ 60/400] eta: 0:02:45 lr: 0.000249 loss: 2.8300 (2.8193) grad: 0.2160 (0.2208) time: 0.4412 data: 0.0043 max mem: 22448 +train: [4] [ 80/400] eta: 0:02:32 lr: 0.000252 loss: 2.8335 (2.8219) grad: 0.2139 (0.2189) time: 0.4451 data: 0.0043 max mem: 22448 +train: [4] [100/400] eta: 0:02:21 lr: 0.000255 loss: 2.8445 (2.8298) grad: 0.2183 (0.2213) time: 0.4414 data: 0.0043 max mem: 22448 +train: [4] [120/400] eta: 0:02:10 lr: 0.000258 loss: 2.8445 (2.8274) grad: 0.2288 (0.2224) time: 0.4426 data: 0.0042 max mem: 22448 +train: [4] [140/400] eta: 0:02:00 lr: 0.000261 loss: 2.8262 (2.8279) grad: 0.2326 (0.2250) time: 0.4406 data: 0.0042 max mem: 22448 +train: [4] [160/400] eta: 0:01:50 lr: 0.000264 loss: 2.8262 (2.8296) grad: 0.2362 (0.2264) time: 0.4473 data: 0.0043 max mem: 22448 +train: [4] [180/400] eta: 0:01:41 lr: 0.000267 loss: 2.8246 (2.8331) grad: 0.2414 (0.2286) time: 0.4602 data: 0.0043 max mem: 22448 +train: [4] [200/400] eta: 0:01:31 lr: 0.000270 loss: 2.8224 (2.8292) grad: 0.2448 (0.2300) time: 0.4473 data: 0.0042 max mem: 22448 +train: [4] [220/400] eta: 0:01:22 lr: 0.000273 loss: 2.8378 (2.8319) grad: 0.2479 (0.2322) time: 0.4540 data: 0.0044 max mem: 22448 +train: [4] [240/400] eta: 0:01:13 lr: 0.000276 loss: 2.8443 (2.8320) grad: 0.2557 (0.2353) time: 0.4522 data: 0.0043 max mem: 22448 +train: [4] [260/400] eta: 0:01:03 lr: 0.000279 loss: 2.8560 (2.8364) grad: 0.2832 (0.2420) time: 0.4466 data: 0.0042 max mem: 22448 +train: [4] [280/400] eta: 0:00:54 lr: 0.000282 loss: 2.9925 (2.8581) grad: 0.4488 (0.2887) time: 0.4428 data: 0.0042 max mem: 22448 +WARNING: classifier 43 (22, 1.0) diverged (loss=77.93 > 63.56) at step 944. Freezing. +train: [4] [300/400] eta: 0:00:45 lr: 0.000285 loss: 3.0891 (2.8863) grad: 0.7468 (0.3320) time: 0.4575 data: 0.0043 max mem: 22448 +train: [4] [320/400] eta: 0:00:36 lr: 0.000288 loss: 3.1947 (2.9080) grad: 0.9362 (0.3753) time: 0.4604 data: 0.0042 max mem: 22448 +WARNING: classifier 42 (19, 1.0) diverged (loss=75.21 > 63.56) at step 968. Freezing. +train: [4] [340/400] eta: 0:00:27 lr: 0.000291 loss: 3.3012 (2.9449) grad: 1.1647 (0.4348) time: 0.4420 data: 0.0041 max mem: 22448 +train: [4] [360/400] eta: 0:00:18 lr: 0.000294 loss: 2.9188 (2.9402) grad: 0.2197 (0.4224) time: 0.4516 data: 0.0041 max mem: 22448 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 2.8279 (2.9342) grad: 0.2094 (0.4113) time: 0.4516 data: 0.0042 max mem: 22448 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 2.8118 (2.9279) grad: 0.2102 (0.4015) time: 0.4421 data: 0.0043 max mem: 22448 +train: [4] Total time: 0:03:01 (0.4548 s / it) +train: [4] Summary: lr: 0.000300 loss: 2.8118 (2.9279) grad: 0.2102 (0.4015) +eval (validation): [4] [ 0/85] eta: 0:04:38 time: 3.2743 data: 3.0249 max mem: 22448 +eval (validation): [4] [20/85] eta: 0:00:30 time: 0.3324 data: 0.0043 max mem: 22448 +eval (validation): [4] [40/85] eta: 0:00:18 time: 0.3362 data: 0.0038 max mem: 22448 +eval (validation): [4] [60/85] eta: 0:00:09 time: 0.3450 data: 0.0044 max mem: 22448 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.3395 data: 0.0041 max mem: 22448 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.3273 data: 0.0040 max mem: 22448 +eval (validation): [4] Total time: 0:00:31 (0.3754 s / it) +cv: [4] best hparam: (1.6, 1.0) (027) ('027_lr1.6e+00_wd1.0e+00') loss: 2.531 acc: 0.242 f1: 0.179 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:22:51 lr: nan time: 3.4298 data: 3.1002 max mem: 22448 +train: [5] [ 20/400] eta: 0:03:35 lr: 0.000300 loss: 2.7570 (2.7577) grad: 0.2189 (0.2203) time: 0.4229 data: 0.0027 max mem: 22448 +train: [5] [ 40/400] eta: 0:03:04 lr: 0.000300 loss: 2.7822 (2.7994) grad: 0.2246 (0.2237) time: 0.4544 data: 0.0043 max mem: 22448 +train: [5] [ 60/400] eta: 0:02:45 lr: 0.000300 loss: 2.8088 (2.8034) grad: 0.2263 (0.2266) time: 0.4339 data: 0.0045 max mem: 22448 +train: [5] [ 80/400] eta: 0:02:31 lr: 0.000300 loss: 2.8056 (2.7963) grad: 0.2256 (0.2259) time: 0.4392 data: 0.0044 max mem: 22448 +train: [5] [100/400] eta: 0:02:20 lr: 0.000300 loss: 2.8080 (2.7994) grad: 0.2290 (0.2289) time: 0.4380 data: 0.0044 max mem: 22448 +train: [5] [120/400] eta: 0:02:09 lr: 0.000300 loss: 2.7802 (2.7934) grad: 0.2310 (0.2285) time: 0.4404 data: 0.0041 max mem: 22448 +train: [5] [140/400] eta: 0:01:59 lr: 0.000300 loss: 2.7469 (2.7862) grad: 0.2207 (0.2274) time: 0.4306 data: 0.0042 max mem: 22448 +train: [5] [160/400] eta: 0:01:49 lr: 0.000299 loss: 2.7415 (2.7842) grad: 0.2176 (0.2274) time: 0.4456 data: 0.0040 max mem: 22448 +train: [5] [180/400] eta: 0:01:40 lr: 0.000299 loss: 2.7647 (2.7860) grad: 0.2180 (0.2268) time: 0.4404 data: 0.0043 max mem: 22448 +train: [5] [200/400] eta: 0:01:30 lr: 0.000299 loss: 2.7757 (2.7851) grad: 0.2228 (0.2271) time: 0.4341 data: 0.0041 max mem: 22448 +train: [5] [220/400] eta: 0:01:21 lr: 0.000299 loss: 2.7648 (2.7837) grad: 0.2248 (0.2267) time: 0.4442 data: 0.0042 max mem: 22448 +train: [5] [240/400] eta: 0:01:12 lr: 0.000299 loss: 2.7697 (2.7831) grad: 0.2248 (0.2270) time: 0.4430 data: 0.0042 max mem: 22448 +train: [5] [260/400] eta: 0:01:03 lr: 0.000299 loss: 2.7770 (2.7806) grad: 0.2210 (0.2263) time: 0.4421 data: 0.0043 max mem: 22448 +train: [5] [280/400] eta: 0:00:54 lr: 0.000298 loss: 2.7782 (2.7805) grad: 0.2216 (0.2266) time: 0.4488 data: 0.0041 max mem: 22448 +train: [5] [300/400] eta: 0:00:45 lr: 0.000298 loss: 2.7574 (2.7777) grad: 0.2250 (0.2265) time: 0.4602 data: 0.0044 max mem: 22448 +train: [5] [320/400] eta: 0:00:36 lr: 0.000298 loss: 2.7477 (2.7786) grad: 0.2287 (0.2270) time: 0.4462 data: 0.0043 max mem: 22448 +train: [5] [340/400] eta: 0:00:27 lr: 0.000298 loss: 2.7481 (2.7765) grad: 0.2318 (0.2272) time: 0.4405 data: 0.0042 max mem: 22448 +train: [5] [360/400] eta: 0:00:18 lr: 0.000297 loss: 2.7475 (2.7760) grad: 0.2309 (0.2276) time: 0.4530 data: 0.0042 max mem: 22448 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 2.7481 (2.7753) grad: 0.2320 (0.2279) time: 0.4473 data: 0.0045 max mem: 22448 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 2.7041 (2.7725) grad: 0.2205 (0.2272) time: 0.4393 data: 0.0042 max mem: 22448 +train: [5] Total time: 0:02:59 (0.4500 s / it) +train: [5] Summary: lr: 0.000297 loss: 2.7041 (2.7725) grad: 0.2205 (0.2272) +eval (validation): [5] [ 0/85] eta: 0:04:43 time: 3.3358 data: 3.0564 max mem: 22448 +eval (validation): [5] [20/85] eta: 0:00:32 time: 0.3591 data: 0.0050 max mem: 22448 +eval (validation): [5] [40/85] eta: 0:00:18 time: 0.3302 data: 0.0037 max mem: 22448 +eval (validation): [5] [60/85] eta: 0:00:09 time: 0.3422 data: 0.0045 max mem: 22448 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3273 data: 0.0042 max mem: 22448 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3176 data: 0.0040 max mem: 22448 +eval (validation): [5] Total time: 0:00:31 (0.3764 s / it) +cv: [5] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 2.535 acc: 0.244 f1: 0.169 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:22:32 lr: nan time: 3.3802 data: 3.0174 max mem: 22448 +train: [6] [ 20/400] eta: 0:03:37 lr: 0.000296 loss: 2.7077 (2.7023) grad: 0.2227 (0.2208) time: 0.4306 data: 0.0047 max mem: 22448 +train: [6] [ 40/400] eta: 0:03:04 lr: 0.000296 loss: 2.7202 (2.7172) grad: 0.2237 (0.2250) time: 0.4499 data: 0.0036 max mem: 22448 +train: [6] [ 60/400] eta: 0:02:46 lr: 0.000296 loss: 2.7099 (2.7125) grad: 0.2294 (0.2269) time: 0.4432 data: 0.0043 max mem: 22448 +train: [6] [ 80/400] eta: 0:02:32 lr: 0.000295 loss: 2.7037 (2.7063) grad: 0.2312 (0.2298) time: 0.4398 data: 0.0042 max mem: 22448 +train: [6] [100/400] eta: 0:02:20 lr: 0.000295 loss: 2.7200 (2.7108) grad: 0.2371 (0.2311) time: 0.4347 data: 0.0044 max mem: 22448 +train: [6] [120/400] eta: 0:02:09 lr: 0.000295 loss: 2.7251 (2.7124) grad: 0.2425 (0.2326) time: 0.4411 data: 0.0042 max mem: 22448 +train: [6] [140/400] eta: 0:01:59 lr: 0.000294 loss: 2.7362 (2.7197) grad: 0.2425 (0.2342) time: 0.4384 data: 0.0042 max mem: 22448 +train: [6] [160/400] eta: 0:01:50 lr: 0.000294 loss: 2.7377 (2.7213) grad: 0.2354 (0.2338) time: 0.4441 data: 0.0041 max mem: 22448 +train: [6] [180/400] eta: 0:01:40 lr: 0.000293 loss: 2.7031 (2.7189) grad: 0.2339 (0.2347) time: 0.4552 data: 0.0041 max mem: 22448 +train: [6] [200/400] eta: 0:01:31 lr: 0.000293 loss: 2.7157 (2.7196) grad: 0.2444 (0.2352) time: 0.4365 data: 0.0043 max mem: 22448 +train: [6] [220/400] eta: 0:01:21 lr: 0.000292 loss: 2.7421 (2.7168) grad: 0.2412 (0.2359) time: 0.4485 data: 0.0043 max mem: 22448 +train: [6] [240/400] eta: 0:01:12 lr: 0.000292 loss: 2.7423 (2.7188) grad: 0.2365 (0.2358) time: 0.4327 data: 0.0042 max mem: 22448 +train: [6] [260/400] eta: 0:01:03 lr: 0.000291 loss: 2.7364 (2.7157) grad: 0.2350 (0.2356) time: 0.4392 data: 0.0041 max mem: 22448 +train: [6] [280/400] eta: 0:00:54 lr: 0.000291 loss: 2.7224 (2.7168) grad: 0.2241 (0.2353) time: 0.4272 data: 0.0043 max mem: 22448 +train: [6] [300/400] eta: 0:00:45 lr: 0.000290 loss: 2.7373 (2.7179) grad: 0.2279 (0.2352) time: 0.4636 data: 0.0045 max mem: 22448 +train: [6] [320/400] eta: 0:00:36 lr: 0.000290 loss: 2.7333 (2.7195) grad: 0.2387 (0.2355) time: 0.4540 data: 0.0043 max mem: 22448 +train: [6] [340/400] eta: 0:00:27 lr: 0.000289 loss: 2.7255 (2.7195) grad: 0.2350 (0.2354) time: 0.4377 data: 0.0041 max mem: 22448 +train: [6] [360/400] eta: 0:00:18 lr: 0.000288 loss: 2.7034 (2.7180) grad: 0.2325 (0.2351) time: 0.4416 data: 0.0042 max mem: 22448 +train: [6] [380/400] eta: 0:00:08 lr: 0.000288 loss: 2.6940 (2.7191) grad: 0.2325 (0.2351) time: 0.4446 data: 0.0043 max mem: 22448 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 2.7143 (2.7184) grad: 0.2264 (0.2345) time: 0.4388 data: 0.0042 max mem: 22448 +train: [6] Total time: 0:02:59 (0.4497 s / it) +train: [6] Summary: lr: 0.000287 loss: 2.7143 (2.7184) grad: 0.2264 (0.2345) +eval (validation): [6] [ 0/85] eta: 0:04:37 time: 3.2682 data: 3.0412 max mem: 22448 +eval (validation): [6] [20/85] eta: 0:00:31 time: 0.3482 data: 0.0038 max mem: 22448 +eval (validation): [6] [40/85] eta: 0:00:18 time: 0.3344 data: 0.0040 max mem: 22448 +eval (validation): [6] [60/85] eta: 0:00:09 time: 0.3284 data: 0.0043 max mem: 22448 +eval (validation): [6] [80/85] eta: 0:00:01 time: 0.3309 data: 0.0042 max mem: 22448 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3247 data: 0.0041 max mem: 22448 +eval (validation): [6] Total time: 0:00:31 (0.3721 s / it) +cv: [6] best hparam: (0.72, 1.0) (022) ('022_lr7.2e-01_wd1.0e+00') loss: 2.521 acc: 0.245 f1: 0.179 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:22:16 lr: nan time: 3.3416 data: 3.0142 max mem: 22448 +train: [7] [ 20/400] eta: 0:03:37 lr: 0.000286 loss: 2.6445 (2.6481) grad: 0.2274 (0.2337) time: 0.4339 data: 0.0043 max mem: 22448 +train: [7] [ 40/400] eta: 0:03:03 lr: 0.000286 loss: 2.6464 (2.6529) grad: 0.2344 (0.2378) time: 0.4426 data: 0.0039 max mem: 22448 +train: [7] [ 60/400] eta: 0:02:44 lr: 0.000285 loss: 2.6596 (2.6514) grad: 0.2408 (0.2412) time: 0.4359 data: 0.0044 max mem: 22448 +train: [7] [ 80/400] eta: 0:02:31 lr: 0.000284 loss: 2.6596 (2.6570) grad: 0.2363 (0.2378) time: 0.4378 data: 0.0044 max mem: 22448 +train: [7] [100/400] eta: 0:02:19 lr: 0.000284 loss: 2.6220 (2.6515) grad: 0.2243 (0.2366) time: 0.4302 data: 0.0043 max mem: 22448 +train: [7] [120/400] eta: 0:02:08 lr: 0.000283 loss: 2.6381 (2.6510) grad: 0.2328 (0.2370) time: 0.4328 data: 0.0042 max mem: 22448 +train: [7] [140/400] eta: 0:01:58 lr: 0.000282 loss: 2.6491 (2.6540) grad: 0.2323 (0.2366) time: 0.4278 data: 0.0042 max mem: 22448 +train: [7] [160/400] eta: 0:01:48 lr: 0.000282 loss: 2.6550 (2.6528) grad: 0.2303 (0.2365) time: 0.4472 data: 0.0041 max mem: 22448 +train: [7] [180/400] eta: 0:01:39 lr: 0.000281 loss: 2.6810 (2.6577) grad: 0.2399 (0.2376) time: 0.4512 data: 0.0042 max mem: 22448 +train: [7] [200/400] eta: 0:01:30 lr: 0.000280 loss: 2.6681 (2.6570) grad: 0.2347 (0.2370) time: 0.4298 data: 0.0043 max mem: 22448 +train: [7] [220/400] eta: 0:01:21 lr: 0.000279 loss: 2.6274 (2.6526) grad: 0.2347 (0.2373) time: 0.4445 data: 0.0042 max mem: 22448 +train: [7] [240/400] eta: 0:01:12 lr: 0.000278 loss: 2.6316 (2.6533) grad: 0.2382 (0.2374) time: 0.4476 data: 0.0042 max mem: 22448 +train: [7] [260/400] eta: 0:01:02 lr: 0.000278 loss: 2.6599 (2.6532) grad: 0.2371 (0.2371) time: 0.4335 data: 0.0042 max mem: 22448 +train: [7] [280/400] eta: 0:00:53 lr: 0.000277 loss: 2.6094 (2.6494) grad: 0.2368 (0.2371) time: 0.4346 data: 0.0042 max mem: 22448 +train: [7] [300/400] eta: 0:00:44 lr: 0.000276 loss: 2.5884 (2.6492) grad: 0.2383 (0.2373) time: 0.4699 data: 0.0046 max mem: 22448 +train: [7] [320/400] eta: 0:00:35 lr: 0.000275 loss: 2.6572 (2.6489) grad: 0.2383 (0.2372) time: 0.4452 data: 0.0044 max mem: 22448 +train: [7] [340/400] eta: 0:00:26 lr: 0.000274 loss: 2.6376 (2.6468) grad: 0.2285 (0.2367) time: 0.4322 data: 0.0042 max mem: 22448 +train: [7] [360/400] eta: 0:00:17 lr: 0.000273 loss: 2.6376 (2.6480) grad: 0.2307 (0.2368) time: 0.4344 data: 0.0041 max mem: 22448 +train: [7] [380/400] eta: 0:00:08 lr: 0.000272 loss: 2.6681 (2.6484) grad: 0.2351 (0.2371) time: 0.4547 data: 0.0042 max mem: 22448 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 2.6724 (2.6495) grad: 0.2432 (0.2378) time: 0.4365 data: 0.0041 max mem: 22448 +train: [7] Total time: 0:02:59 (0.4477 s / it) +train: [7] Summary: lr: 0.000271 loss: 2.6724 (2.6495) grad: 0.2432 (0.2378) +eval (validation): [7] [ 0/85] eta: 0:04:41 time: 3.3111 data: 3.0697 max mem: 22448 +eval (validation): [7] [20/85] eta: 0:00:31 time: 0.3401 data: 0.0040 max mem: 22448 +eval (validation): [7] [40/85] eta: 0:00:18 time: 0.3396 data: 0.0036 max mem: 22448 +eval (validation): [7] [60/85] eta: 0:00:09 time: 0.3343 data: 0.0043 max mem: 22448 +eval (validation): [7] [80/85] eta: 0:00:01 time: 0.3334 data: 0.0044 max mem: 22448 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.3218 data: 0.0041 max mem: 22448 +eval (validation): [7] Total time: 0:00:31 (0.3739 s / it) +cv: [7] best hparam: (0.72, 1.0) (022) ('022_lr7.2e-01_wd1.0e+00') loss: 2.541 acc: 0.242 f1: 0.176 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:22:29 lr: nan time: 3.3729 data: 3.0507 max mem: 22448 +train: [8] [ 20/400] eta: 0:03:34 lr: 0.000270 loss: 2.5072 (2.5522) grad: 0.2250 (0.2327) time: 0.4246 data: 0.0033 max mem: 22448 +train: [8] [ 40/400] eta: 0:03:00 lr: 0.000270 loss: 2.5637 (2.5710) grad: 0.2330 (0.2354) time: 0.4335 data: 0.0037 max mem: 22448 +train: [8] [ 60/400] eta: 0:02:43 lr: 0.000269 loss: 2.6047 (2.5833) grad: 0.2388 (0.2365) time: 0.4427 data: 0.0042 max mem: 22448 +train: [8] [ 80/400] eta: 0:02:30 lr: 0.000268 loss: 2.5883 (2.5887) grad: 0.2369 (0.2385) time: 0.4342 data: 0.0043 max mem: 22448 +train: [8] [100/400] eta: 0:02:18 lr: 0.000267 loss: 2.5780 (2.5909) grad: 0.2511 (0.2425) time: 0.4335 data: 0.0043 max mem: 22448 +train: [8] [120/400] eta: 0:02:08 lr: 0.000266 loss: 2.5571 (2.5870) grad: 0.2606 (0.2454) time: 0.4339 data: 0.0043 max mem: 22448 +train: [8] [140/400] eta: 0:01:58 lr: 0.000265 loss: 2.5686 (2.5908) grad: 0.2505 (0.2463) time: 0.4358 data: 0.0045 max mem: 22448 +train: [8] [160/400] eta: 0:01:48 lr: 0.000264 loss: 2.5965 (2.5923) grad: 0.2500 (0.2484) time: 0.4299 data: 0.0042 max mem: 22448 +train: [8] [180/400] eta: 0:01:39 lr: 0.000263 loss: 2.5734 (2.5891) grad: 0.2502 (0.2484) time: 0.4411 data: 0.0042 max mem: 22448 +train: [8] [200/400] eta: 0:01:30 lr: 0.000262 loss: 2.5734 (2.5907) grad: 0.2492 (0.2484) time: 0.4548 data: 0.0043 max mem: 22448 +train: [8] [220/400] eta: 0:01:20 lr: 0.000260 loss: 2.6069 (2.5921) grad: 0.2471 (0.2483) time: 0.4371 data: 0.0042 max mem: 22448 +train: [8] [240/400] eta: 0:01:11 lr: 0.000259 loss: 2.5905 (2.5906) grad: 0.2471 (0.2485) time: 0.4459 data: 0.0043 max mem: 22448 +train: [8] [260/400] eta: 0:01:02 lr: 0.000258 loss: 2.5982 (2.5927) grad: 0.2448 (0.2486) time: 0.4463 data: 0.0041 max mem: 22448 +train: [8] [280/400] eta: 0:00:53 lr: 0.000257 loss: 2.5917 (2.5910) grad: 0.2431 (0.2488) time: 0.4384 data: 0.0042 max mem: 22448 +train: [8] [300/400] eta: 0:00:44 lr: 0.000256 loss: 2.5502 (2.5921) grad: 0.2475 (0.2487) time: 0.4507 data: 0.0042 max mem: 22448 +train: [8] [320/400] eta: 0:00:35 lr: 0.000255 loss: 2.5879 (2.5921) grad: 0.2364 (0.2478) time: 0.4703 data: 0.0044 max mem: 22448 +train: [8] [340/400] eta: 0:00:26 lr: 0.000254 loss: 2.5879 (2.5913) grad: 0.2390 (0.2480) time: 0.4496 data: 0.0043 max mem: 22448 +train: [8] [360/400] eta: 0:00:17 lr: 0.000253 loss: 2.6100 (2.5922) grad: 0.2477 (0.2477) time: 0.4340 data: 0.0041 max mem: 22448 +train: [8] [380/400] eta: 0:00:08 lr: 0.000252 loss: 2.6041 (2.5933) grad: 0.2435 (0.2474) time: 0.4620 data: 0.0041 max mem: 22448 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 2.6024 (2.5942) grad: 0.2467 (0.2483) time: 0.4405 data: 0.0042 max mem: 22448 +train: [8] Total time: 0:02:59 (0.4495 s / it) +train: [8] Summary: lr: 0.000250 loss: 2.6024 (2.5942) grad: 0.2467 (0.2483) +eval (validation): [8] [ 0/85] eta: 0:04:46 time: 3.3745 data: 3.0984 max mem: 22448 +eval (validation): [8] [20/85] eta: 0:00:31 time: 0.3333 data: 0.0040 max mem: 22448 +eval (validation): [8] [40/85] eta: 0:00:18 time: 0.3423 data: 0.0036 max mem: 22448 +eval (validation): [8] [60/85] eta: 0:00:09 time: 0.3242 data: 0.0044 max mem: 22448 +eval (validation): [8] [80/85] eta: 0:00:01 time: 0.3272 data: 0.0041 max mem: 22448 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.3205 data: 0.0039 max mem: 22448 +eval (validation): [8] Total time: 0:00:31 (0.3707 s / it) +cv: [8] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.568 acc: 0.243 f1: 0.178 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:22:58 lr: nan time: 3.4452 data: 3.0709 max mem: 22448 +train: [9] [ 20/400] eta: 0:03:42 lr: 0.000249 loss: 2.5495 (2.5383) grad: 0.2504 (0.2550) time: 0.4438 data: 0.0039 max mem: 22448 +train: [9] [ 40/400] eta: 0:03:02 lr: 0.000248 loss: 2.5535 (2.5566) grad: 0.2444 (0.2476) time: 0.4245 data: 0.0040 max mem: 22448 +train: [9] [ 60/400] eta: 0:02:45 lr: 0.000247 loss: 2.5487 (2.5487) grad: 0.2367 (0.2431) time: 0.4426 data: 0.0044 max mem: 22448 +train: [9] [ 80/400] eta: 0:02:31 lr: 0.000246 loss: 2.5368 (2.5559) grad: 0.2432 (0.2453) time: 0.4405 data: 0.0044 max mem: 22448 +train: [9] [100/400] eta: 0:02:20 lr: 0.000244 loss: 2.5368 (2.5554) grad: 0.2499 (0.2474) time: 0.4379 data: 0.0043 max mem: 22448 +train: [9] [120/400] eta: 0:02:09 lr: 0.000243 loss: 2.5209 (2.5537) grad: 0.2500 (0.2477) time: 0.4453 data: 0.0042 max mem: 22448 +train: [9] [140/400] eta: 0:02:00 lr: 0.000242 loss: 2.5309 (2.5545) grad: 0.2481 (0.2487) time: 0.4485 data: 0.0044 max mem: 22448 +train: [9] [160/400] eta: 0:01:50 lr: 0.000241 loss: 2.5538 (2.5505) grad: 0.2516 (0.2490) time: 0.4395 data: 0.0044 max mem: 22448 +train: [9] [180/400] eta: 0:01:40 lr: 0.000240 loss: 2.5327 (2.5541) grad: 0.2516 (0.2499) time: 0.4352 data: 0.0042 max mem: 22448 +train: [9] [200/400] eta: 0:01:30 lr: 0.000238 loss: 2.5327 (2.5520) grad: 0.2521 (0.2505) time: 0.4360 data: 0.0042 max mem: 22448 +train: [9] [220/400] eta: 0:01:21 lr: 0.000237 loss: 2.5341 (2.5488) grad: 0.2536 (0.2510) time: 0.4491 data: 0.0043 max mem: 22448 +train: [9] [240/400] eta: 0:01:12 lr: 0.000236 loss: 2.5571 (2.5513) grad: 0.2536 (0.2511) time: 0.4433 data: 0.0042 max mem: 22448 +train: [9] [260/400] eta: 0:01:03 lr: 0.000234 loss: 2.5586 (2.5507) grad: 0.2451 (0.2506) time: 0.4417 data: 0.0042 max mem: 22448 +train: [9] [280/400] eta: 0:00:54 lr: 0.000233 loss: 2.5435 (2.5513) grad: 0.2528 (0.2511) time: 0.4457 data: 0.0044 max mem: 22448 +train: [9] [300/400] eta: 0:00:45 lr: 0.000232 loss: 2.5561 (2.5523) grad: 0.2600 (0.2517) time: 0.4471 data: 0.0045 max mem: 22448 +train: [9] [320/400] eta: 0:00:36 lr: 0.000230 loss: 2.5490 (2.5530) grad: 0.2479 (0.2512) time: 0.4562 data: 0.0043 max mem: 22448 +train: [9] [340/400] eta: 0:00:27 lr: 0.000229 loss: 2.5295 (2.5507) grad: 0.2474 (0.2512) time: 0.4609 data: 0.0043 max mem: 22448 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 2.5412 (2.5520) grad: 0.2554 (0.2518) time: 0.4340 data: 0.0039 max mem: 22448 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 2.5333 (2.5515) grad: 0.2519 (0.2516) time: 0.4588 data: 0.0042 max mem: 22448 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 2.5330 (2.5528) grad: 0.2563 (0.2523) time: 0.4732 data: 0.0043 max mem: 22448 +train: [9] Total time: 0:03:01 (0.4527 s / it) +train: [9] Summary: lr: 0.000225 loss: 2.5330 (2.5528) grad: 0.2563 (0.2523) +eval (validation): [9] [ 0/85] eta: 0:04:28 time: 3.1595 data: 2.9309 max mem: 22448 +eval (validation): [9] [20/85] eta: 0:00:30 time: 0.3318 data: 0.0047 max mem: 22448 +eval (validation): [9] [40/85] eta: 0:00:18 time: 0.3564 data: 0.0033 max mem: 22448 +eval (validation): [9] [60/85] eta: 0:00:10 time: 0.3840 data: 0.0043 max mem: 22448 +eval (validation): [9] [80/85] eta: 0:00:01 time: 0.3564 data: 0.0045 max mem: 22448 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3321 data: 0.0042 max mem: 22448 +eval (validation): [9] Total time: 0:00:33 (0.3910 s / it) +cv: [9] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.518 acc: 0.249 f1: 0.173 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [10] [ 0/400] eta: 0:22:02 lr: nan time: 3.3051 data: 2.9504 max mem: 22448 +train: [10] [ 20/400] eta: 0:03:36 lr: 0.000224 loss: 2.5131 (2.5087) grad: 0.2548 (0.2627) time: 0.4337 data: 0.0045 max mem: 22448 +train: [10] [ 40/400] eta: 0:03:03 lr: 0.000222 loss: 2.4904 (2.4906) grad: 0.2523 (0.2565) time: 0.4443 data: 0.0038 max mem: 22448 +train: [10] [ 60/400] eta: 0:02:45 lr: 0.000221 loss: 2.5005 (2.5072) grad: 0.2486 (0.2541) time: 0.4410 data: 0.0042 max mem: 22448 +train: [10] [ 80/400] eta: 0:02:32 lr: 0.000220 loss: 2.5114 (2.4988) grad: 0.2463 (0.2518) time: 0.4411 data: 0.0041 max mem: 22448 +train: [10] [100/400] eta: 0:02:20 lr: 0.000218 loss: 2.5012 (2.4976) grad: 0.2442 (0.2508) time: 0.4398 data: 0.0041 max mem: 22448 +train: [10] [120/400] eta: 0:02:09 lr: 0.000217 loss: 2.5090 (2.4993) grad: 0.2509 (0.2516) time: 0.4427 data: 0.0041 max mem: 22448 +train: [10] [140/400] eta: 0:02:00 lr: 0.000215 loss: 2.4965 (2.4995) grad: 0.2509 (0.2510) time: 0.4486 data: 0.0044 max mem: 22448 +train: [10] [160/400] eta: 0:01:50 lr: 0.000214 loss: 2.4763 (2.4984) grad: 0.2448 (0.2510) time: 0.4395 data: 0.0043 max mem: 22448 +train: [10] [180/400] eta: 0:01:40 lr: 0.000213 loss: 2.5002 (2.5018) grad: 0.2458 (0.2513) time: 0.4525 data: 0.0043 max mem: 22448 +train: [10] [200/400] eta: 0:01:31 lr: 0.000211 loss: 2.5289 (2.5039) grad: 0.2535 (0.2517) time: 0.4266 data: 0.0043 max mem: 22448 +train: [10] [220/400] eta: 0:01:21 lr: 0.000210 loss: 2.5141 (2.5040) grad: 0.2531 (0.2518) time: 0.4484 data: 0.0043 max mem: 22448 +train: [10] [240/400] eta: 0:01:12 lr: 0.000208 loss: 2.4902 (2.5040) grad: 0.2474 (0.2514) time: 0.4440 data: 0.0043 max mem: 22448 +train: [10] [260/400] eta: 0:01:03 lr: 0.000207 loss: 2.4902 (2.5030) grad: 0.2458 (0.2513) time: 0.4351 data: 0.0042 max mem: 22448 +train: [10] [280/400] eta: 0:00:54 lr: 0.000205 loss: 2.4993 (2.5045) grad: 0.2501 (0.2514) time: 0.4415 data: 0.0043 max mem: 22448 +train: [10] [300/400] eta: 0:00:45 lr: 0.000204 loss: 2.4705 (2.5019) grad: 0.2512 (0.2514) time: 0.4513 data: 0.0043 max mem: 22448 +train: [10] [320/400] eta: 0:00:36 lr: 0.000202 loss: 2.4782 (2.5019) grad: 0.2559 (0.2522) time: 0.4350 data: 0.0041 max mem: 22448 +train: [10] [340/400] eta: 0:00:27 lr: 0.000201 loss: 2.5026 (2.5031) grad: 0.2593 (0.2522) time: 0.4473 data: 0.0043 max mem: 22448 +train: [10] [360/400] eta: 0:00:17 lr: 0.000199 loss: 2.4942 (2.5019) grad: 0.2432 (0.2518) time: 0.4322 data: 0.0043 max mem: 22448 +train: [10] [380/400] eta: 0:00:08 lr: 0.000198 loss: 2.4712 (2.5004) grad: 0.2453 (0.2517) time: 0.4436 data: 0.0042 max mem: 22448 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 2.4581 (2.5010) grad: 0.2548 (0.2520) time: 0.4598 data: 0.0043 max mem: 22448 +train: [10] Total time: 0:02:59 (0.4499 s / it) +train: [10] Summary: lr: 0.000196 loss: 2.4581 (2.5010) grad: 0.2548 (0.2520) +eval (validation): [10] [ 0/85] eta: 0:04:42 time: 3.3179 data: 3.0455 max mem: 22448 +eval (validation): [10] [20/85] eta: 0:00:32 time: 0.3579 data: 0.0042 max mem: 22448 +eval (validation): [10] [40/85] eta: 0:00:19 time: 0.3614 data: 0.0043 max mem: 22448 +eval (validation): [10] [60/85] eta: 0:00:10 time: 0.3559 data: 0.0045 max mem: 22448 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3344 data: 0.0041 max mem: 22448 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3292 data: 0.0041 max mem: 22448 +eval (validation): [10] Total time: 0:00:33 (0.3892 s / it) +cv: [10] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.515 acc: 0.248 f1: 0.176 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:22:20 lr: nan time: 3.3509 data: 3.0262 max mem: 22448 +train: [11] [ 20/400] eta: 0:03:34 lr: 0.000195 loss: 2.4486 (2.4473) grad: 0.2432 (0.2480) time: 0.4244 data: 0.0035 max mem: 22448 +train: [11] [ 40/400] eta: 0:03:02 lr: 0.000193 loss: 2.4486 (2.4563) grad: 0.2492 (0.2500) time: 0.4480 data: 0.0042 max mem: 22448 +train: [11] [ 60/400] eta: 0:02:45 lr: 0.000192 loss: 2.4442 (2.4482) grad: 0.2519 (0.2546) time: 0.4441 data: 0.0042 max mem: 22448 +train: [11] [ 80/400] eta: 0:02:34 lr: 0.000190 loss: 2.4584 (2.4552) grad: 0.2558 (0.2559) time: 0.4728 data: 0.0045 max mem: 22448 +train: [11] [100/400] eta: 0:02:22 lr: 0.000189 loss: 2.4560 (2.4498) grad: 0.2527 (0.2545) time: 0.4470 data: 0.0043 max mem: 22448 +train: [11] [120/400] eta: 0:02:11 lr: 0.000187 loss: 2.3900 (2.4383) grad: 0.2487 (0.2537) time: 0.4335 data: 0.0044 max mem: 22448 +train: [11] [140/400] eta: 0:02:01 lr: 0.000186 loss: 2.4301 (2.4442) grad: 0.2492 (0.2541) time: 0.4506 data: 0.0044 max mem: 22448 +train: [11] [160/400] eta: 0:01:51 lr: 0.000184 loss: 2.4669 (2.4501) grad: 0.2552 (0.2543) time: 0.4392 data: 0.0043 max mem: 22448 +train: [11] [180/400] eta: 0:01:41 lr: 0.000183 loss: 2.4542 (2.4508) grad: 0.2658 (0.2563) time: 0.4403 data: 0.0044 max mem: 22448 +train: [11] [200/400] eta: 0:01:31 lr: 0.000181 loss: 2.4542 (2.4546) grad: 0.2661 (0.2565) time: 0.4314 data: 0.0042 max mem: 22448 +train: [11] [220/400] eta: 0:01:22 lr: 0.000180 loss: 2.4927 (2.4604) grad: 0.2526 (0.2561) time: 0.4528 data: 0.0044 max mem: 22448 +train: [11] [240/400] eta: 0:01:12 lr: 0.000178 loss: 2.5046 (2.4635) grad: 0.2490 (0.2560) time: 0.4406 data: 0.0042 max mem: 22448 +train: [11] [260/400] eta: 0:01:03 lr: 0.000177 loss: 2.4816 (2.4637) grad: 0.2545 (0.2564) time: 0.4383 data: 0.0042 max mem: 22448 +train: [11] [280/400] eta: 0:00:54 lr: 0.000175 loss: 2.4656 (2.4643) grad: 0.2589 (0.2565) time: 0.4310 data: 0.0040 max mem: 22448 +train: [11] [300/400] eta: 0:00:45 lr: 0.000174 loss: 2.4680 (2.4674) grad: 0.2527 (0.2568) time: 0.4424 data: 0.0041 max mem: 22448 +train: [11] [320/400] eta: 0:00:36 lr: 0.000172 loss: 2.4946 (2.4681) grad: 0.2597 (0.2576) time: 0.4526 data: 0.0043 max mem: 22448 +train: [11] [340/400] eta: 0:00:27 lr: 0.000170 loss: 2.4797 (2.4680) grad: 0.2603 (0.2584) time: 0.4525 data: 0.0043 max mem: 22448 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 2.4581 (2.4673) grad: 0.2640 (0.2590) time: 0.4337 data: 0.0041 max mem: 22448 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 2.4114 (2.4638) grad: 0.2588 (0.2586) time: 0.4396 data: 0.0041 max mem: 22448 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 2.4509 (2.4670) grad: 0.2483 (0.2581) time: 0.4495 data: 0.0043 max mem: 22448 +train: [11] Total time: 0:03:00 (0.4508 s / it) +train: [11] Summary: lr: 0.000166 loss: 2.4509 (2.4670) grad: 0.2483 (0.2581) +eval (validation): [11] [ 0/85] eta: 0:04:49 time: 3.4085 data: 3.1249 max mem: 22448 +eval (validation): [11] [20/85] eta: 0:00:33 time: 0.3698 data: 0.0045 max mem: 22448 +eval (validation): [11] [40/85] eta: 0:00:20 time: 0.3760 data: 0.0045 max mem: 22448 +eval (validation): [11] [60/85] eta: 0:00:10 time: 0.3637 data: 0.0044 max mem: 22448 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3278 data: 0.0040 max mem: 22448 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3240 data: 0.0037 max mem: 22448 +eval (validation): [11] Total time: 0:00:33 (0.3972 s / it) +cv: [11] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.519 acc: 0.245 f1: 0.173 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:23:15 lr: nan time: 3.4886 data: 3.1168 max mem: 22448 +train: [12] [ 20/400] eta: 0:03:42 lr: 0.000164 loss: 2.3647 (2.3702) grad: 0.2376 (0.2406) time: 0.4414 data: 0.0034 max mem: 22448 +train: [12] [ 40/400] eta: 0:03:06 lr: 0.000163 loss: 2.3730 (2.3849) grad: 0.2440 (0.2471) time: 0.4454 data: 0.0043 max mem: 22448 +train: [12] [ 60/400] eta: 0:02:47 lr: 0.000161 loss: 2.4109 (2.3933) grad: 0.2488 (0.2477) time: 0.4430 data: 0.0041 max mem: 22448 +train: [12] [ 80/400] eta: 0:02:34 lr: 0.000160 loss: 2.3956 (2.3975) grad: 0.2471 (0.2462) time: 0.4550 data: 0.0042 max mem: 22448 +train: [12] [100/400] eta: 0:02:22 lr: 0.000158 loss: 2.3674 (2.4004) grad: 0.2443 (0.2464) time: 0.4475 data: 0.0042 max mem: 22448 +train: [12] [120/400] eta: 0:02:11 lr: 0.000156 loss: 2.4234 (2.4040) grad: 0.2417 (0.2453) time: 0.4338 data: 0.0042 max mem: 22448 +train: [12] [140/400] eta: 0:02:01 lr: 0.000155 loss: 2.4234 (2.4048) grad: 0.2468 (0.2484) time: 0.4512 data: 0.0043 max mem: 22448 +train: [12] [160/400] eta: 0:01:51 lr: 0.000153 loss: 2.4171 (2.4044) grad: 0.2623 (0.2503) time: 0.4410 data: 0.0042 max mem: 22448 +train: [12] [180/400] eta: 0:01:41 lr: 0.000152 loss: 2.3974 (2.4064) grad: 0.2586 (0.2512) time: 0.4388 data: 0.0043 max mem: 22448 +train: [12] [200/400] eta: 0:01:31 lr: 0.000150 loss: 2.4395 (2.4121) grad: 0.2509 (0.2518) time: 0.4364 data: 0.0044 max mem: 22448 +train: [12] [220/400] eta: 0:01:22 lr: 0.000149 loss: 2.4469 (2.4152) grad: 0.2504 (0.2514) time: 0.4404 data: 0.0043 max mem: 22448 +train: [12] [240/400] eta: 0:01:13 lr: 0.000147 loss: 2.4172 (2.4135) grad: 0.2553 (0.2532) time: 0.4526 data: 0.0044 max mem: 22448 +train: [12] [260/400] eta: 0:01:03 lr: 0.000145 loss: 2.3890 (2.4141) grad: 0.2591 (0.2533) time: 0.4355 data: 0.0043 max mem: 22448 +train: [12] [280/400] eta: 0:00:54 lr: 0.000144 loss: 2.4010 (2.4115) grad: 0.2519 (0.2535) time: 0.4416 data: 0.0043 max mem: 22448 +train: [12] [300/400] eta: 0:00:45 lr: 0.000142 loss: 2.4209 (2.4154) grad: 0.2553 (0.2544) time: 0.4450 data: 0.0044 max mem: 22448 +train: [12] [320/400] eta: 0:00:36 lr: 0.000141 loss: 2.4363 (2.4163) grad: 0.2553 (0.2543) time: 0.4319 data: 0.0041 max mem: 22448 +train: [12] [340/400] eta: 0:00:27 lr: 0.000139 loss: 2.4190 (2.4158) grad: 0.2523 (0.2542) time: 0.4373 data: 0.0041 max mem: 22448 +train: [12] [360/400] eta: 0:00:18 lr: 0.000138 loss: 2.4140 (2.4164) grad: 0.2493 (0.2543) time: 0.4475 data: 0.0042 max mem: 22448 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 2.4140 (2.4174) grad: 0.2545 (0.2542) time: 0.4442 data: 0.0042 max mem: 22448 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 2.4102 (2.4172) grad: 0.2575 (0.2548) time: 0.4359 data: 0.0041 max mem: 22448 +train: [12] Total time: 0:03:00 (0.4502 s / it) +train: [12] Summary: lr: 0.000134 loss: 2.4102 (2.4172) grad: 0.2575 (0.2548) +eval (validation): [12] [ 0/85] eta: 0:05:37 time: 3.9660 data: 3.6795 max mem: 22448 +eval (validation): [12] [20/85] eta: 0:00:33 time: 0.3506 data: 0.0045 max mem: 22448 +eval (validation): [12] [40/85] eta: 0:00:19 time: 0.3274 data: 0.0035 max mem: 22448 +eval (validation): [12] [60/85] eta: 0:00:10 time: 0.3558 data: 0.0045 max mem: 22448 +eval (validation): [12] [80/85] eta: 0:00:01 time: 0.3451 data: 0.0042 max mem: 22448 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3323 data: 0.0039 max mem: 22448 +eval (validation): [12] Total time: 0:00:33 (0.3900 s / it) +cv: [12] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.544 acc: 0.240 f1: 0.172 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:22:22 lr: nan time: 3.3560 data: 3.0291 max mem: 22448 +train: [13] [ 20/400] eta: 0:03:33 lr: 0.000133 loss: 2.3628 (2.3745) grad: 0.2518 (0.2562) time: 0.4229 data: 0.0044 max mem: 22448 +train: [13] [ 40/400] eta: 0:02:58 lr: 0.000131 loss: 2.3724 (2.3729) grad: 0.2539 (0.2544) time: 0.4282 data: 0.0039 max mem: 22448 +train: [13] [ 60/400] eta: 0:02:43 lr: 0.000130 loss: 2.3724 (2.3760) grad: 0.2508 (0.2531) time: 0.4463 data: 0.0044 max mem: 22448 +train: [13] [ 80/400] eta: 0:02:31 lr: 0.000128 loss: 2.3718 (2.3766) grad: 0.2502 (0.2540) time: 0.4492 data: 0.0043 max mem: 22448 +train: [13] [100/400] eta: 0:02:20 lr: 0.000127 loss: 2.3537 (2.3763) grad: 0.2494 (0.2529) time: 0.4492 data: 0.0042 max mem: 22448 +train: [13] [120/400] eta: 0:02:10 lr: 0.000125 loss: 2.3402 (2.3699) grad: 0.2500 (0.2531) time: 0.4460 data: 0.0042 max mem: 22448 +train: [13] [140/400] eta: 0:02:00 lr: 0.000124 loss: 2.3979 (2.3794) grad: 0.2590 (0.2553) time: 0.4478 data: 0.0042 max mem: 22448 +train: [13] [160/400] eta: 0:01:50 lr: 0.000122 loss: 2.3882 (2.3783) grad: 0.2657 (0.2565) time: 0.4530 data: 0.0043 max mem: 22448 +train: [13] [180/400] eta: 0:01:40 lr: 0.000120 loss: 2.3882 (2.3853) grad: 0.2657 (0.2574) time: 0.4377 data: 0.0042 max mem: 22448 +train: [13] [200/400] eta: 0:01:31 lr: 0.000119 loss: 2.3689 (2.3803) grad: 0.2576 (0.2575) time: 0.4375 data: 0.0041 max mem: 22448 +train: [13] [220/400] eta: 0:01:21 lr: 0.000117 loss: 2.3689 (2.3804) grad: 0.2576 (0.2583) time: 0.4421 data: 0.0039 max mem: 22448 +train: [13] [240/400] eta: 0:01:12 lr: 0.000116 loss: 2.3931 (2.3804) grad: 0.2574 (0.2584) time: 0.4558 data: 0.0039 max mem: 22448 +train: [13] [260/400] eta: 0:01:03 lr: 0.000114 loss: 2.3876 (2.3829) grad: 0.2529 (0.2577) time: 0.4491 data: 0.0041 max mem: 22448 +train: [13] [280/400] eta: 0:00:54 lr: 0.000113 loss: 2.3876 (2.3823) grad: 0.2451 (0.2570) time: 0.4431 data: 0.0040 max mem: 22448 +train: [13] [300/400] eta: 0:00:45 lr: 0.000111 loss: 2.3546 (2.3816) grad: 0.2438 (0.2560) time: 0.4399 data: 0.0040 max mem: 22448 +train: [13] [320/400] eta: 0:00:36 lr: 0.000110 loss: 2.3567 (2.3811) grad: 0.2534 (0.2565) time: 0.4410 data: 0.0040 max mem: 22448 +train: [13] [340/400] eta: 0:00:27 lr: 0.000108 loss: 2.3688 (2.3812) grad: 0.2581 (0.2562) time: 0.4498 data: 0.0041 max mem: 22448 +train: [13] [360/400] eta: 0:00:18 lr: 0.000107 loss: 2.3760 (2.3811) grad: 0.2582 (0.2566) time: 0.4458 data: 0.0043 max mem: 22448 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 2.3857 (2.3818) grad: 0.2640 (0.2571) time: 0.4440 data: 0.0042 max mem: 22448 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 2.4062 (2.3825) grad: 0.2579 (0.2573) time: 0.4312 data: 0.0042 max mem: 22448 +train: [13] Total time: 0:03:00 (0.4506 s / it) +train: [13] Summary: lr: 0.000104 loss: 2.4062 (2.3825) grad: 0.2579 (0.2573) +eval (validation): [13] [ 0/85] eta: 0:04:35 time: 3.2362 data: 2.9606 max mem: 22448 +eval (validation): [13] [20/85] eta: 0:00:32 time: 0.3685 data: 0.0052 max mem: 22448 +eval (validation): [13] [40/85] eta: 0:00:18 time: 0.3284 data: 0.0041 max mem: 22448 +eval (validation): [13] [60/85] eta: 0:00:09 time: 0.3341 data: 0.0043 max mem: 22448 +eval (validation): [13] [80/85] eta: 0:00:01 time: 0.3409 data: 0.0044 max mem: 22448 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3326 data: 0.0039 max mem: 22448 +eval (validation): [13] Total time: 0:00:32 (0.3798 s / it) +cv: [13] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.518 acc: 0.244 f1: 0.177 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:22:42 lr: nan time: 3.4074 data: 3.0365 max mem: 22448 +train: [14] [ 20/400] eta: 0:03:37 lr: 0.000102 loss: 2.3012 (2.3091) grad: 0.2486 (0.2489) time: 0.4318 data: 0.0034 max mem: 22448 +train: [14] [ 40/400] eta: 0:03:01 lr: 0.000101 loss: 2.3012 (2.3134) grad: 0.2417 (0.2485) time: 0.4294 data: 0.0044 max mem: 22448 +train: [14] [ 60/400] eta: 0:02:42 lr: 0.000099 loss: 2.3324 (2.3203) grad: 0.2500 (0.2503) time: 0.4302 data: 0.0042 max mem: 22448 +train: [14] [ 80/400] eta: 0:02:30 lr: 0.000098 loss: 2.3371 (2.3308) grad: 0.2445 (0.2491) time: 0.4374 data: 0.0044 max mem: 22448 +train: [14] [100/400] eta: 0:02:19 lr: 0.000096 loss: 2.3423 (2.3356) grad: 0.2425 (0.2494) time: 0.4481 data: 0.0043 max mem: 22448 +train: [14] [120/400] eta: 0:02:09 lr: 0.000095 loss: 2.3073 (2.3322) grad: 0.2556 (0.2503) time: 0.4417 data: 0.0042 max mem: 22448 +train: [14] [140/400] eta: 0:01:58 lr: 0.000093 loss: 2.2938 (2.3332) grad: 0.2614 (0.2520) time: 0.4339 data: 0.0042 max mem: 22448 +train: [14] [160/400] eta: 0:01:49 lr: 0.000092 loss: 2.2866 (2.3319) grad: 0.2543 (0.2523) time: 0.4496 data: 0.0041 max mem: 22448 +train: [14] [180/400] eta: 0:01:40 lr: 0.000090 loss: 2.2980 (2.3310) grad: 0.2543 (0.2532) time: 0.4495 data: 0.0043 max mem: 22448 +train: [14] [200/400] eta: 0:01:30 lr: 0.000089 loss: 2.2836 (2.3289) grad: 0.2573 (0.2538) time: 0.4479 data: 0.0044 max mem: 22448 +train: [14] [220/400] eta: 0:01:21 lr: 0.000088 loss: 2.3129 (2.3309) grad: 0.2573 (0.2545) time: 0.4399 data: 0.0043 max mem: 22448 +train: [14] [240/400] eta: 0:01:12 lr: 0.000086 loss: 2.3369 (2.3346) grad: 0.2552 (0.2544) time: 0.4431 data: 0.0041 max mem: 22448 +train: [14] [260/400] eta: 0:01:03 lr: 0.000085 loss: 2.3506 (2.3350) grad: 0.2513 (0.2542) time: 0.4564 data: 0.0042 max mem: 22448 +train: [14] [280/400] eta: 0:00:54 lr: 0.000083 loss: 2.3541 (2.3355) grad: 0.2511 (0.2536) time: 0.4525 data: 0.0042 max mem: 22448 +train: [14] [300/400] eta: 0:00:45 lr: 0.000082 loss: 2.3629 (2.3398) grad: 0.2507 (0.2539) time: 0.4520 data: 0.0042 max mem: 22448 +train: [14] [320/400] eta: 0:00:36 lr: 0.000081 loss: 2.3656 (2.3410) grad: 0.2578 (0.2544) time: 0.4437 data: 0.0043 max mem: 22448 +train: [14] [340/400] eta: 0:00:27 lr: 0.000079 loss: 2.3340 (2.3418) grad: 0.2628 (0.2551) time: 0.4356 data: 0.0042 max mem: 22448 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 2.3501 (2.3422) grad: 0.2577 (0.2553) time: 0.4494 data: 0.0043 max mem: 22448 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 2.3331 (2.3405) grad: 0.2577 (0.2555) time: 0.4532 data: 0.0044 max mem: 22448 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 2.3331 (2.3423) grad: 0.2643 (0.2562) time: 0.4382 data: 0.0043 max mem: 22448 +train: [14] Total time: 0:03:00 (0.4508 s / it) +train: [14] Summary: lr: 0.000075 loss: 2.3331 (2.3423) grad: 0.2643 (0.2562) +eval (validation): [14] [ 0/85] eta: 0:04:37 time: 3.2664 data: 2.9797 max mem: 22448 +eval (validation): [14] [20/85] eta: 0:00:32 time: 0.3630 data: 0.0038 max mem: 22448 +eval (validation): [14] [40/85] eta: 0:00:18 time: 0.3369 data: 0.0038 max mem: 22448 +eval (validation): [14] [60/85] eta: 0:00:09 time: 0.3272 data: 0.0040 max mem: 22448 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3497 data: 0.0042 max mem: 22448 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3426 data: 0.0041 max mem: 22448 +eval (validation): [14] Total time: 0:00:32 (0.3804 s / it) +cv: [14] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.520 acc: 0.248 f1: 0.173 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:22:27 lr: nan time: 3.3697 data: 3.0137 max mem: 22448 +train: [15] [ 20/400] eta: 0:03:34 lr: 0.000074 loss: 2.2950 (2.3233) grad: 0.2413 (0.2493) time: 0.4231 data: 0.0040 max mem: 22448 +train: [15] [ 40/400] eta: 0:02:58 lr: 0.000072 loss: 2.3146 (2.3224) grad: 0.2413 (0.2515) time: 0.4268 data: 0.0039 max mem: 22448 +train: [15] [ 60/400] eta: 0:02:41 lr: 0.000071 loss: 2.3072 (2.3132) grad: 0.2566 (0.2529) time: 0.4282 data: 0.0041 max mem: 22448 +train: [15] [ 80/400] eta: 0:02:28 lr: 0.000070 loss: 2.2743 (2.3047) grad: 0.2499 (0.2518) time: 0.4309 data: 0.0042 max mem: 22448 +train: [15] [100/400] eta: 0:02:17 lr: 0.000068 loss: 2.2686 (2.2978) grad: 0.2486 (0.2522) time: 0.4380 data: 0.0041 max mem: 22448 +train: [15] [120/400] eta: 0:02:07 lr: 0.000067 loss: 2.2940 (2.3029) grad: 0.2498 (0.2533) time: 0.4446 data: 0.0041 max mem: 22448 +train: [15] [140/400] eta: 0:01:58 lr: 0.000066 loss: 2.3310 (2.3083) grad: 0.2571 (0.2548) time: 0.4395 data: 0.0042 max mem: 22448 +train: [15] [160/400] eta: 0:01:48 lr: 0.000064 loss: 2.3132 (2.3069) grad: 0.2548 (0.2545) time: 0.4279 data: 0.0042 max mem: 22448 +train: [15] [180/400] eta: 0:01:39 lr: 0.000063 loss: 2.3132 (2.3141) grad: 0.2538 (0.2550) time: 0.4496 data: 0.0043 max mem: 22448 +train: [15] [200/400] eta: 0:01:29 lr: 0.000062 loss: 2.3099 (2.3116) grad: 0.2538 (0.2546) time: 0.4335 data: 0.0043 max mem: 22448 +train: [15] [220/400] eta: 0:01:20 lr: 0.000061 loss: 2.3099 (2.3112) grad: 0.2490 (0.2546) time: 0.4407 data: 0.0043 max mem: 22448 +train: [15] [240/400] eta: 0:01:11 lr: 0.000059 loss: 2.3120 (2.3105) grad: 0.2573 (0.2551) time: 0.4374 data: 0.0040 max mem: 22448 +train: [15] [260/400] eta: 0:01:02 lr: 0.000058 loss: 2.3501 (2.3148) grad: 0.2585 (0.2551) time: 0.4391 data: 0.0042 max mem: 22448 +train: [15] [280/400] eta: 0:00:53 lr: 0.000057 loss: 2.3227 (2.3123) grad: 0.2480 (0.2544) time: 0.4409 data: 0.0043 max mem: 22448 +train: [15] [300/400] eta: 0:00:44 lr: 0.000056 loss: 2.3009 (2.3122) grad: 0.2446 (0.2543) time: 0.4450 data: 0.0043 max mem: 22448 +train: [15] [320/400] eta: 0:00:35 lr: 0.000054 loss: 2.3098 (2.3135) grad: 0.2494 (0.2544) time: 0.4467 data: 0.0042 max mem: 22448 +train: [15] [340/400] eta: 0:00:26 lr: 0.000053 loss: 2.3227 (2.3137) grad: 0.2492 (0.2543) time: 0.4488 data: 0.0043 max mem: 22448 +train: [15] [360/400] eta: 0:00:17 lr: 0.000052 loss: 2.3070 (2.3145) grad: 0.2506 (0.2548) time: 0.4269 data: 0.0042 max mem: 22448 +train: [15] [380/400] eta: 0:00:08 lr: 0.000051 loss: 2.2806 (2.3123) grad: 0.2500 (0.2545) time: 0.4495 data: 0.0041 max mem: 22448 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 2.2755 (2.3132) grad: 0.2457 (0.2544) time: 0.4560 data: 0.0044 max mem: 22448 +train: [15] Total time: 0:02:58 (0.4462 s / it) +train: [15] Summary: lr: 0.000050 loss: 2.2755 (2.3132) grad: 0.2457 (0.2544) +eval (validation): [15] [ 0/85] eta: 0:04:37 time: 3.2679 data: 3.0439 max mem: 22448 +eval (validation): [15] [20/85] eta: 0:00:30 time: 0.3352 data: 0.0038 max mem: 22448 +eval (validation): [15] [40/85] eta: 0:00:18 time: 0.3662 data: 0.0038 max mem: 22448 +eval (validation): [15] [60/85] eta: 0:00:09 time: 0.3428 data: 0.0044 max mem: 22448 +eval (validation): [15] [80/85] eta: 0:00:01 time: 0.3263 data: 0.0040 max mem: 22448 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.3230 data: 0.0040 max mem: 22448 +eval (validation): [15] Total time: 0:00:32 (0.3790 s / it) +cv: [15] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.518 acc: 0.250 f1: 0.174 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [16] [ 0/400] eta: 0:22:05 lr: nan time: 3.3132 data: 2.9959 max mem: 22448 +train: [16] [ 20/400] eta: 0:03:30 lr: 0.000048 loss: 2.2646 (2.2892) grad: 0.2353 (0.2428) time: 0.4172 data: 0.0037 max mem: 22448 +train: [16] [ 40/400] eta: 0:02:58 lr: 0.000047 loss: 2.2646 (2.2813) grad: 0.2370 (0.2424) time: 0.4323 data: 0.0044 max mem: 22448 +train: [16] [ 60/400] eta: 0:02:42 lr: 0.000046 loss: 2.2630 (2.2739) grad: 0.2422 (0.2442) time: 0.4381 data: 0.0043 max mem: 22448 +train: [16] [ 80/400] eta: 0:02:29 lr: 0.000045 loss: 2.3090 (2.2849) grad: 0.2464 (0.2462) time: 0.4415 data: 0.0042 max mem: 22448 +train: [16] [100/400] eta: 0:02:18 lr: 0.000044 loss: 2.3037 (2.2812) grad: 0.2512 (0.2474) time: 0.4374 data: 0.0042 max mem: 22448 +train: [16] [120/400] eta: 0:02:08 lr: 0.000043 loss: 2.2629 (2.2830) grad: 0.2512 (0.2481) time: 0.4533 data: 0.0042 max mem: 22448 +train: [16] [140/400] eta: 0:01:59 lr: 0.000042 loss: 2.2629 (2.2812) grad: 0.2468 (0.2473) time: 0.4502 data: 0.0041 max mem: 22448 +train: [16] [160/400] eta: 0:01:49 lr: 0.000041 loss: 2.3127 (2.2865) grad: 0.2479 (0.2487) time: 0.4360 data: 0.0041 max mem: 22448 +train: [16] [180/400] eta: 0:01:40 lr: 0.000040 loss: 2.3187 (2.2871) grad: 0.2513 (0.2486) time: 0.4460 data: 0.0042 max mem: 22448 +train: [16] [200/400] eta: 0:01:30 lr: 0.000039 loss: 2.2565 (2.2831) grad: 0.2449 (0.2480) time: 0.4366 data: 0.0042 max mem: 22448 +train: [16] [220/400] eta: 0:01:21 lr: 0.000038 loss: 2.2492 (2.2818) grad: 0.2366 (0.2475) time: 0.4425 data: 0.0044 max mem: 22448 +train: [16] [240/400] eta: 0:01:11 lr: 0.000036 loss: 2.2774 (2.2819) grad: 0.2436 (0.2479) time: 0.4224 data: 0.0043 max mem: 22448 +train: [16] [260/400] eta: 0:01:02 lr: 0.000035 loss: 2.2892 (2.2846) grad: 0.2554 (0.2486) time: 0.4508 data: 0.0043 max mem: 22448 +train: [16] [280/400] eta: 0:00:53 lr: 0.000034 loss: 2.3000 (2.2855) grad: 0.2514 (0.2486) time: 0.4408 data: 0.0043 max mem: 22448 +train: [16] [300/400] eta: 0:00:44 lr: 0.000033 loss: 2.3000 (2.2874) grad: 0.2479 (0.2487) time: 0.4367 data: 0.0045 max mem: 22448 +train: [16] [320/400] eta: 0:00:35 lr: 0.000032 loss: 2.3293 (2.2912) grad: 0.2501 (0.2491) time: 0.4518 data: 0.0044 max mem: 22448 +train: [16] [340/400] eta: 0:00:26 lr: 0.000031 loss: 2.2988 (2.2910) grad: 0.2508 (0.2493) time: 0.4445 data: 0.0043 max mem: 22448 +train: [16] [360/400] eta: 0:00:17 lr: 0.000031 loss: 2.2721 (2.2906) grad: 0.2452 (0.2494) time: 0.4354 data: 0.0043 max mem: 22448 +train: [16] [380/400] eta: 0:00:08 lr: 0.000030 loss: 2.2763 (2.2900) grad: 0.2527 (0.2499) time: 0.4434 data: 0.0041 max mem: 22448 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 2.2848 (2.2903) grad: 0.2554 (0.2503) time: 0.4464 data: 0.0042 max mem: 22448 +train: [16] Total time: 0:02:59 (0.4476 s / it) +train: [16] Summary: lr: 0.000029 loss: 2.2848 (2.2903) grad: 0.2554 (0.2503) +eval (validation): [16] [ 0/85] eta: 0:04:37 time: 3.2626 data: 2.9978 max mem: 22448 +eval (validation): [16] [20/85] eta: 0:00:31 time: 0.3535 data: 0.0038 max mem: 22448 +eval (validation): [16] [40/85] eta: 0:00:19 time: 0.3514 data: 0.0041 max mem: 22448 +eval (validation): [16] [60/85] eta: 0:00:09 time: 0.3406 data: 0.0044 max mem: 22448 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3254 data: 0.0042 max mem: 22448 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3235 data: 0.0042 max mem: 22448 +eval (validation): [16] Total time: 0:00:32 (0.3795 s / it) +cv: [16] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.523 acc: 0.246 f1: 0.179 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:22:39 lr: nan time: 3.3994 data: 3.0276 max mem: 22448 +train: [17] [ 20/400] eta: 0:03:43 lr: 0.000028 loss: 2.2005 (2.2366) grad: 0.2286 (0.2373) time: 0.4464 data: 0.0051 max mem: 22448 +train: [17] [ 40/400] eta: 0:03:04 lr: 0.000027 loss: 2.2514 (2.2609) grad: 0.2387 (0.2410) time: 0.4341 data: 0.0045 max mem: 22448 +train: [17] [ 60/400] eta: 0:02:44 lr: 0.000026 loss: 2.2800 (2.2742) grad: 0.2381 (0.2405) time: 0.4278 data: 0.0044 max mem: 22448 +train: [17] [ 80/400] eta: 0:02:31 lr: 0.000025 loss: 2.2593 (2.2636) grad: 0.2381 (0.2411) time: 0.4410 data: 0.0042 max mem: 22448 +train: [17] [100/400] eta: 0:02:20 lr: 0.000024 loss: 2.2574 (2.2697) grad: 0.2424 (0.2412) time: 0.4401 data: 0.0042 max mem: 22448 +train: [17] [120/400] eta: 0:02:10 lr: 0.000023 loss: 2.2884 (2.2686) grad: 0.2424 (0.2417) time: 0.4670 data: 0.0043 max mem: 22448 +train: [17] [140/400] eta: 0:02:00 lr: 0.000023 loss: 2.2779 (2.2678) grad: 0.2429 (0.2421) time: 0.4485 data: 0.0044 max mem: 22448 +train: [17] [160/400] eta: 0:01:50 lr: 0.000022 loss: 2.2616 (2.2670) grad: 0.2425 (0.2430) time: 0.4344 data: 0.0040 max mem: 22448 +train: [17] [180/400] eta: 0:01:41 lr: 0.000021 loss: 2.2438 (2.2652) grad: 0.2405 (0.2425) time: 0.4510 data: 0.0042 max mem: 22448 +train: [17] [200/400] eta: 0:01:31 lr: 0.000020 loss: 2.2336 (2.2656) grad: 0.2405 (0.2424) time: 0.4464 data: 0.0042 max mem: 22448 +train: [17] [220/400] eta: 0:01:22 lr: 0.000019 loss: 2.2623 (2.2653) grad: 0.2459 (0.2432) time: 0.4369 data: 0.0040 max mem: 22448 +train: [17] [240/400] eta: 0:01:12 lr: 0.000019 loss: 2.2701 (2.2648) grad: 0.2505 (0.2439) time: 0.4353 data: 0.0041 max mem: 22448 +train: [17] [260/400] eta: 0:01:03 lr: 0.000018 loss: 2.2852 (2.2658) grad: 0.2505 (0.2442) time: 0.4465 data: 0.0040 max mem: 22448 +train: [17] [280/400] eta: 0:00:54 lr: 0.000017 loss: 2.2756 (2.2670) grad: 0.2464 (0.2442) time: 0.4455 data: 0.0042 max mem: 22448 +train: [17] [300/400] eta: 0:00:45 lr: 0.000016 loss: 2.2756 (2.2680) grad: 0.2449 (0.2440) time: 0.4299 data: 0.0041 max mem: 22448 +train: [17] [320/400] eta: 0:00:36 lr: 0.000016 loss: 2.2645 (2.2683) grad: 0.2356 (0.2434) time: 0.4400 data: 0.0041 max mem: 22448 +train: [17] [340/400] eta: 0:00:27 lr: 0.000015 loss: 2.2480 (2.2684) grad: 0.2376 (0.2437) time: 0.4444 data: 0.0042 max mem: 22448 +train: [17] [360/400] eta: 0:00:17 lr: 0.000014 loss: 2.2588 (2.2689) grad: 0.2414 (0.2437) time: 0.4358 data: 0.0043 max mem: 22448 +train: [17] [380/400] eta: 0:00:08 lr: 0.000014 loss: 2.2588 (2.2672) grad: 0.2434 (0.2445) time: 0.4403 data: 0.0042 max mem: 22448 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 2.2332 (2.2662) grad: 0.2490 (0.2442) time: 0.4559 data: 0.0042 max mem: 22448 +train: [17] Total time: 0:03:00 (0.4500 s / it) +train: [17] Summary: lr: 0.000013 loss: 2.2332 (2.2662) grad: 0.2490 (0.2442) +eval (validation): [17] [ 0/85] eta: 0:04:40 time: 3.2942 data: 3.0585 max mem: 22448 +eval (validation): [17] [20/85] eta: 0:00:32 time: 0.3662 data: 0.0392 max mem: 22448 +eval (validation): [17] [40/85] eta: 0:00:19 time: 0.3521 data: 0.0039 max mem: 22448 +eval (validation): [17] [60/85] eta: 0:00:10 time: 0.3767 data: 0.0046 max mem: 22448 +eval (validation): [17] [80/85] eta: 0:00:01 time: 0.3316 data: 0.0042 max mem: 22448 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3291 data: 0.0040 max mem: 22448 +eval (validation): [17] Total time: 0:00:33 (0.3935 s / it) +cv: [17] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.512 acc: 0.252 f1: 0.179 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [18] [ 0/400] eta: 0:22:06 lr: nan time: 3.3165 data: 2.9946 max mem: 22448 +train: [18] [ 20/400] eta: 0:03:34 lr: 0.000012 loss: 2.2785 (2.3164) grad: 0.2457 (0.2459) time: 0.4261 data: 0.0045 max mem: 22448 +train: [18] [ 40/400] eta: 0:02:58 lr: 0.000012 loss: 2.2618 (2.2597) grad: 0.2402 (0.2439) time: 0.4244 data: 0.0041 max mem: 22448 +train: [18] [ 60/400] eta: 0:02:41 lr: 0.000011 loss: 2.2174 (2.2522) grad: 0.2470 (0.2472) time: 0.4316 data: 0.0042 max mem: 22448 +train: [18] [ 80/400] eta: 0:02:28 lr: 0.000011 loss: 2.2242 (2.2498) grad: 0.2388 (0.2441) time: 0.4297 data: 0.0044 max mem: 22448 +train: [18] [100/400] eta: 0:02:17 lr: 0.000010 loss: 2.2357 (2.2491) grad: 0.2336 (0.2429) time: 0.4335 data: 0.0042 max mem: 22448 +train: [18] [120/400] eta: 0:02:07 lr: 0.000009 loss: 2.2357 (2.2448) grad: 0.2421 (0.2425) time: 0.4520 data: 0.0043 max mem: 22448 +train: [18] [140/400] eta: 0:01:58 lr: 0.000009 loss: 2.2461 (2.2487) grad: 0.2424 (0.2428) time: 0.4440 data: 0.0044 max mem: 22448 +train: [18] [160/400] eta: 0:01:48 lr: 0.000008 loss: 2.2586 (2.2498) grad: 0.2382 (0.2420) time: 0.4287 data: 0.0043 max mem: 22448 +train: [18] [180/400] eta: 0:01:39 lr: 0.000008 loss: 2.2495 (2.2494) grad: 0.2403 (0.2426) time: 0.4432 data: 0.0043 max mem: 22448 +train: [18] [200/400] eta: 0:01:29 lr: 0.000007 loss: 2.2659 (2.2531) grad: 0.2427 (0.2425) time: 0.4365 data: 0.0043 max mem: 22448 +train: [18] [220/400] eta: 0:01:20 lr: 0.000007 loss: 2.2573 (2.2530) grad: 0.2434 (0.2426) time: 0.4436 data: 0.0042 max mem: 22448 +train: [18] [240/400] eta: 0:01:11 lr: 0.000006 loss: 2.2465 (2.2549) grad: 0.2423 (0.2424) time: 0.4387 data: 0.0044 max mem: 22448 +train: [18] [260/400] eta: 0:01:02 lr: 0.000006 loss: 2.2465 (2.2553) grad: 0.2397 (0.2423) time: 0.4375 data: 0.0043 max mem: 22448 +train: [18] [280/400] eta: 0:00:53 lr: 0.000006 loss: 2.2433 (2.2548) grad: 0.2407 (0.2423) time: 0.4483 data: 0.0043 max mem: 22448 +train: [18] [300/400] eta: 0:00:44 lr: 0.000005 loss: 2.2580 (2.2552) grad: 0.2389 (0.2420) time: 0.4335 data: 0.0042 max mem: 22448 +train: [18] [320/400] eta: 0:00:35 lr: 0.000005 loss: 2.2788 (2.2563) grad: 0.2389 (0.2419) time: 0.4528 data: 0.0044 max mem: 22448 +train: [18] [340/400] eta: 0:00:26 lr: 0.000004 loss: 2.2310 (2.2533) grad: 0.2404 (0.2417) time: 0.4623 data: 0.0043 max mem: 22448 +train: [18] [360/400] eta: 0:00:17 lr: 0.000004 loss: 2.2025 (2.2524) grad: 0.2404 (0.2416) time: 0.4429 data: 0.0045 max mem: 22448 +train: [18] [380/400] eta: 0:00:08 lr: 0.000004 loss: 2.2396 (2.2524) grad: 0.2410 (0.2418) time: 0.4391 data: 0.0043 max mem: 22448 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 2.2320 (2.2503) grad: 0.2416 (0.2420) time: 0.4560 data: 0.0042 max mem: 22448 +train: [18] Total time: 0:02:59 (0.4476 s / it) +train: [18] Summary: lr: 0.000003 loss: 2.2320 (2.2503) grad: 0.2416 (0.2420) +eval (validation): [18] [ 0/85] eta: 0:04:51 time: 3.4250 data: 3.1459 max mem: 22448 +eval (validation): [18] [20/85] eta: 0:00:31 time: 0.3336 data: 0.0039 max mem: 22448 +eval (validation): [18] [40/85] eta: 0:00:18 time: 0.3465 data: 0.0038 max mem: 22448 +eval (validation): [18] [60/85] eta: 0:00:09 time: 0.3535 data: 0.0043 max mem: 22448 +eval (validation): [18] [80/85] eta: 0:00:01 time: 0.3263 data: 0.0041 max mem: 22448 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3192 data: 0.0039 max mem: 22448 +eval (validation): [18] Total time: 0:00:32 (0.3789 s / it) +cv: [18] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.517 acc: 0.250 f1: 0.177 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:23:04 lr: nan time: 3.4622 data: 3.0876 max mem: 22448 +train: [19] [ 20/400] eta: 0:03:44 lr: 0.000003 loss: 2.2759 (2.2952) grad: 0.2303 (0.2376) time: 0.4464 data: 0.0040 max mem: 22448 +train: [19] [ 40/400] eta: 0:03:03 lr: 0.000003 loss: 2.2416 (2.2590) grad: 0.2358 (0.2393) time: 0.4278 data: 0.0042 max mem: 22448 +train: [19] [ 60/400] eta: 0:02:44 lr: 0.000002 loss: 2.2300 (2.2675) grad: 0.2382 (0.2383) time: 0.4302 data: 0.0044 max mem: 22448 +train: [19] [ 80/400] eta: 0:02:31 lr: 0.000002 loss: 2.2657 (2.2680) grad: 0.2337 (0.2376) time: 0.4422 data: 0.0043 max mem: 22448 +train: [19] [100/400] eta: 0:02:20 lr: 0.000002 loss: 2.2541 (2.2630) grad: 0.2352 (0.2375) time: 0.4387 data: 0.0042 max mem: 22448 +train: [19] [120/400] eta: 0:02:09 lr: 0.000002 loss: 2.2562 (2.2655) grad: 0.2377 (0.2387) time: 0.4440 data: 0.0042 max mem: 22448 +train: [19] [140/400] eta: 0:01:59 lr: 0.000001 loss: 2.2641 (2.2599) grad: 0.2377 (0.2377) time: 0.4459 data: 0.0043 max mem: 22448 +train: [19] [160/400] eta: 0:01:49 lr: 0.000001 loss: 2.2132 (2.2528) grad: 0.2272 (0.2369) time: 0.4366 data: 0.0043 max mem: 22448 +train: [19] [180/400] eta: 0:01:40 lr: 0.000001 loss: 2.2048 (2.2493) grad: 0.2389 (0.2382) time: 0.4411 data: 0.0041 max mem: 22448 +train: [19] [200/400] eta: 0:01:31 lr: 0.000001 loss: 2.2399 (2.2505) grad: 0.2389 (0.2381) time: 0.4503 data: 0.0043 max mem: 22448 +train: [19] [220/400] eta: 0:01:21 lr: 0.000001 loss: 2.2399 (2.2485) grad: 0.2322 (0.2376) time: 0.4436 data: 0.0042 max mem: 22448 +train: [19] [240/400] eta: 0:01:12 lr: 0.000001 loss: 2.2329 (2.2477) grad: 0.2321 (0.2379) time: 0.4424 data: 0.0043 max mem: 22448 +train: [19] [260/400] eta: 0:01:03 lr: 0.000000 loss: 2.2202 (2.2458) grad: 0.2343 (0.2378) time: 0.4340 data: 0.0042 max mem: 22448 +train: [19] [280/400] eta: 0:00:54 lr: 0.000000 loss: 2.2404 (2.2473) grad: 0.2379 (0.2378) time: 0.4408 data: 0.0042 max mem: 22448 +train: [19] [300/400] eta: 0:00:44 lr: 0.000000 loss: 2.2712 (2.2472) grad: 0.2403 (0.2375) time: 0.4352 data: 0.0041 max mem: 22448 +train: [19] [320/400] eta: 0:00:35 lr: 0.000000 loss: 2.2486 (2.2485) grad: 0.2393 (0.2378) time: 0.4357 data: 0.0041 max mem: 22448 +train: [19] [340/400] eta: 0:00:26 lr: 0.000000 loss: 2.2667 (2.2512) grad: 0.2350 (0.2377) time: 0.4498 data: 0.0041 max mem: 22448 +train: [19] [360/400] eta: 0:00:17 lr: 0.000000 loss: 2.2667 (2.2509) grad: 0.2307 (0.2374) time: 0.4438 data: 0.0041 max mem: 22448 +train: [19] [380/400] eta: 0:00:08 lr: 0.000000 loss: 2.2255 (2.2501) grad: 0.2293 (0.2373) time: 0.4314 data: 0.0043 max mem: 22448 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 2.2499 (2.2521) grad: 0.2394 (0.2375) time: 0.4426 data: 0.0043 max mem: 22448 +train: [19] Total time: 0:02:59 (0.4480 s / it) +train: [19] Summary: lr: 0.000000 loss: 2.2499 (2.2521) grad: 0.2394 (0.2375) +eval (validation): [19] [ 0/85] eta: 0:04:54 time: 3.4664 data: 3.2052 max mem: 22448 +eval (validation): [19] [20/85] eta: 0:00:32 time: 0.3439 data: 0.0036 max mem: 22448 +eval (validation): [19] [40/85] eta: 0:00:18 time: 0.3452 data: 0.0036 max mem: 22448 +eval (validation): [19] [60/85] eta: 0:00:09 time: 0.3564 data: 0.0042 max mem: 22448 +eval (validation): [19] [80/85] eta: 0:00:01 time: 0.3346 data: 0.0041 max mem: 22448 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.3243 data: 0.0039 max mem: 22448 +eval (validation): [19] Total time: 0:00:32 (0.3831 s / it) +cv: [19] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.516 acc: 0.250 f1: 0.177 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.2502768549280177, "hparam": [0.32, 1.0], "hparam_id": 17, "epoch": 19, "is_best": false, "best_score": 0.25212255444813586} +eval (train): [20] [ 0/509] eta: 0:25:45 time: 3.0362 data: 2.7978 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:11 time: 0.3891 data: 0.0047 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:30 time: 0.3796 data: 0.0042 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:11 time: 0.3790 data: 0.0050 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:54 time: 0.3507 data: 0.0040 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:41 time: 0.3396 data: 0.0045 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:30 time: 0.3440 data: 0.0044 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:19 time: 0.3327 data: 0.0040 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:11 time: 0.3557 data: 0.0043 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:03 time: 0.3716 data: 0.0045 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:55 time: 0.3622 data: 0.0044 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:46 time: 0.3343 data: 0.0040 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:38 time: 0.3393 data: 0.0041 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:30 time: 0.3397 data: 0.0038 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:23 time: 0.3821 data: 0.0046 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:16 time: 0.3489 data: 0.0039 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:08 time: 0.3348 data: 0.0042 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:01 time: 0.3509 data: 0.0042 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:54 time: 0.3763 data: 0.0045 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:46 time: 0.3617 data: 0.0047 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:39 time: 0.3556 data: 0.0043 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:32 time: 0.3490 data: 0.0043 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:25 time: 0.4014 data: 0.0045 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:17 time: 0.4183 data: 0.0046 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3540 data: 0.0044 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3367 data: 0.0039 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3272 data: 0.0038 max mem: 22448 +eval (train): [20] Total time: 0:03:06 (0.3659 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:19 time: 3.0579 data: 2.7758 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:31 time: 0.3604 data: 0.0046 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:19 time: 0.3746 data: 0.0040 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3909 data: 0.0044 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:02 time: 0.3549 data: 0.0045 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3476 data: 0.0042 max mem: 22448 +eval (validation): [20] Total time: 0:00:34 (0.4037 s / it) +eval (test): [20] [ 0/85] eta: 0:04:23 time: 3.0986 data: 2.8337 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:30 time: 0.3458 data: 0.0081 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:19 time: 0.3789 data: 0.0043 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3468 data: 0.0037 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3495 data: 0.0043 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3333 data: 0.0040 max mem: 22448 +eval (test): [20] Total time: 0:00:32 (0.3877 s / it) +eval (testid): [20] [ 0/82] eta: 0:03:59 time: 2.9223 data: 2.6627 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:31 time: 0.3876 data: 0.0051 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3508 data: 0.0039 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:09 time: 0.3685 data: 0.0046 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3287 data: 0.0042 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3188 data: 0.0040 max mem: 22448 +eval (testid): [20] Total time: 0:00:32 (0.3914 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.25212255444813586, "hparam": [0.32, 1.0], "hparam_id": 17, "epoch": 17, "is_best": true, "best_score": 0.25212255444813586} +eval (train): [20] [ 0/509] eta: 0:25:41 time: 3.0279 data: 2.7896 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:11 time: 0.3884 data: 0.0054 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:25 time: 0.3586 data: 0.0035 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:05 time: 0.3613 data: 0.0041 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:49 time: 0.3430 data: 0.0041 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:37 time: 0.3414 data: 0.0042 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:26 time: 0.3380 data: 0.0040 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:17 time: 0.3400 data: 0.0040 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:08 time: 0.3462 data: 0.0041 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:00 time: 0.3356 data: 0.0042 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:52 time: 0.3422 data: 0.0041 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:45 time: 0.3907 data: 0.0046 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:37 time: 0.3402 data: 0.0041 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:30 time: 0.3415 data: 0.0044 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:22 time: 0.3653 data: 0.0044 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:15 time: 0.3534 data: 0.0044 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:08 time: 0.3644 data: 0.0044 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:00 time: 0.3454 data: 0.0043 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:53 time: 0.3471 data: 0.0044 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:46 time: 0.3562 data: 0.0041 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:39 time: 0.3817 data: 0.0044 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:32 time: 0.3525 data: 0.0040 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:24 time: 0.3566 data: 0.0045 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:17 time: 0.3645 data: 0.0042 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3531 data: 0.0048 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3394 data: 0.0042 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3270 data: 0.0041 max mem: 22448 +eval (train): [20] Total time: 0:03:03 (0.3598 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:27 time: 3.1484 data: 2.8737 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:34 time: 0.4056 data: 0.0051 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:20 time: 0.3593 data: 0.0038 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3673 data: 0.0044 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:02 time: 0.3508 data: 0.0041 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3343 data: 0.0039 max mem: 22448 +eval (validation): [20] Total time: 0:00:34 (0.4038 s / it) +eval (test): [20] [ 0/85] eta: 0:04:17 time: 3.0316 data: 2.7612 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:35 time: 0.4219 data: 0.0057 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:21 time: 0.3868 data: 0.0040 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3378 data: 0.0043 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:02 time: 0.3469 data: 0.0044 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3285 data: 0.0042 max mem: 22448 +eval (test): [20] Total time: 0:00:34 (0.4040 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:11 time: 3.0726 data: 2.7813 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:31 time: 0.3824 data: 0.0045 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3601 data: 0.0043 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:09 time: 0.3679 data: 0.0049 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3379 data: 0.0042 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3280 data: 0.0041 max mem: 22448 +eval (testid): [20] Total time: 0:00:32 (0.3962 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|--------:|-----:|------------:|:------------|:-----------|-------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | nsd_cococlip | best | 17 | 9.6e-05 | 0.05 | 17 | [0.32, 1.0] | train | 2.2802 | 0.3174 | 0.002172 | 0.24997 | 0.0021745 | +| flat_mae | patch | attn | nsd_cococlip | best | 17 | 9.6e-05 | 0.05 | 17 | [0.32, 1.0] | validation | 2.5121 | 0.25212 | 0.0055583 | 0.17889 | 0.0046179 | +| flat_mae | patch | attn | nsd_cococlip | best | 17 | 9.6e-05 | 0.05 | 17 | [0.32, 1.0] | test | 2.4678 | 0.25918 | 0.0051236 | 0.18568 | 0.0046254 | +| flat_mae | patch | attn | nsd_cococlip | best | 17 | 9.6e-05 | 0.05 | 17 | [0.32, 1.0] | testid | 2.439 | 0.26142 | 0.0052534 | 0.1982 | 0.0047833 | + + +done! total time: 1:23:26 diff --git a/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/train_log.json b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..65f0c652b50c117442673ca4e98708fde1190faa --- /dev/null +++ b/data_scaling/n100_2/eval_v2/nsd_cococlip__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.15309507727623, "train/grad": 0.1617125478759408, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1935888671875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.19269775390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.19136962890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.190018310546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.188779296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.18701416015625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.185224609375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1833056640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.180830078125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.178505859375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.17636962890625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1734716796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.171011962890625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.16770263671875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.165093994140625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.162939453125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1605322265625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1581591796875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.155902099609375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.154136962890625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.1523193359375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.15075927734375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1493896484375, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.148052978515625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.1469384765625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.1459033203125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.145230712890625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.1448095703125, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.144383544921875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.144132080078125, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.144036865234375, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.144122314453125, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.14433349609375, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.144656982421875, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.14514404296875, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.145750732421875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.146412353515625, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1471661376953124, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1480950927734375, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.1491802978515624, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.149678955078125, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.149630126953125, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.1482257080078124, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.1401495361328124, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.121376953125, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.1000852966308594, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.0860333251953125, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.0833159637451173, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.075694732666016, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02545104713179171, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.025382286803796887, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02527159536257386, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02516405559144914, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02506097286939621, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02492182838730514, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.024771182099357247, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.024611369790509343, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02441392747219652, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02421820000279695, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02403979405295104, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023795730220153927, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023585785101167857, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.023319884832017124, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02310392000246793, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022925745826214552, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02273760635405779, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022554647629149258, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02238910349085927, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022263298705220223, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022136773495003582, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022031141985207794, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.021936292094178498, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.021855787942185997, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.021789181502535938, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.021727299210615458, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.021684814984910192, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02165454374626279, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02162500225473195, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02159681568853557, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02157680309843272, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.021557514383457602, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02152236210182309, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.021477605486288666, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.021403703200630845, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.021301835011690855, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02116555301938206, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0210137457260862, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02084992010612041, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0206941542821005, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02064758670050651, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.020688574681989848, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02099142729770392, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.021908735372126103, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.022788883023895323, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.023913655295036734, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.024893415006808937, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02636064407415688, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.028140277611091735, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.186004400253296, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1833839416503906, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1791951656341553, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.175279378890991, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1716160774230957, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1668455600738525, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.161957263946533, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1570308208465576, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.15138840675354, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1462225914001465, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1419754028320312, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.136837959289551, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1330056190490723, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1291592121124268, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.126777172088623, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1253464221954346, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1242663860321045, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1236610412597656, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.123370885848999, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.123319149017334, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.123396873474121, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1236183643341064, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.124124765396118, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.125028371810913, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1261990070343018, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.127981662750244, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.129605293273926, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.131117820739746, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1329238414764404, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.134486198425293, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1348648071289062, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1341869831085205, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1325387954711914, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1309444904327393, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1292786598205566, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.128095865249634, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1275508403778076, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.127329111099243, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1262147426605225, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.122668743133545, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1138458251953125, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.094193696975708, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.956632375717163, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.854504108428955, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.7584080696105957, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.7672691345214844, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.7190001010894775, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.8530890941619873, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.8127927780151367, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.04891103728313031, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05149501661129568, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.05444813584348468, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05334071613141381, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.056109265411590996, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.05758582502768549, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.05758582502768549, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.060538944259874494, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06016980435585087, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.067921742340347, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.067921742340347, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.060723514211886306, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.055001845699520116, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.05278700627537837, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.0518641565153193, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.0518641565153193, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.0518641565153193, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.05352528608342562, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.05370985603543743, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.05887781469176818, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.06626061277224068, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07216685123661867, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.07272056109265411, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.07438169066076043, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.07050572166851236, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.0695828719084533, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.06939830195644149, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.07788851974898486, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.08656330749354005, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.11701734957548911, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.14857881136950904, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.1788482834994463, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.18456995201181248, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.20284237726098192, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.17201919527500922, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.16758951642672573, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0077962568107435735, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.008238522052188503, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00943583757645799, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.009492753455925028, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010700159529642253, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.011736660315471686, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.011807542279049595, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012497857200130379, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011921569088344268, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.011926802310923778, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.010896578821482093, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.011545146443800997, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013241519663750085, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.014413860440959507, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.016067258283318526, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.015682441485535756, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.015196665985567653, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01379333506671665, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.013260647024147013, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01381055104346797, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.014092113758942823, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.015019607100787069, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.015823608857627545, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.01760454180405074, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.01808157522306579, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.013597982745731948, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.009801577166632202, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.007207832188711295, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.005676960824636056, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.005355707725513178, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.007197892540890763, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.009541025767056177, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.012839075739989667, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.01499570179180707, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.016802143941635445, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.0189837078882048, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.024797909596872572, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.025014277151624287, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.021858345193211226, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.019785747466645735, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.02753880395135676, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.034385789554644476, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.061893881847690374, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.08221476045269112, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.10938206007770701, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.10477579098108579, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.12198457423449148, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.10698678222779279, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.10428766120665782, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 3.0860333251953125, "validation/loss_best": 2.7190001010894775, "validation/acc_best": 0.20284237726098192, "validation/f1_best": 0.12198457423449148} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 3.0229961478710177, "train/grad": 0.184772320240736, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1643017578125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.16089599609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.156004638671875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.15186767578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1484716796875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.144478759765625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1410302734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1380419921875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1351904296875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.133211669921875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.13169677734375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.13042724609375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1295166015625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.128990478515625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.128636474609375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1283740234375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.12818603515625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1280322265625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.127794189453125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.127515869140625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.127298583984375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.126888427734375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.126627197265625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.126241455078125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.12603759765625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.12562744140625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.125357666015625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.125067138671875, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.124571533203125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.1234130859375, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.1211041259765624, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.11694580078125, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.1052386474609377, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.077204284667969, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.03253173828125, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.983046875, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.92612060546875, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.8582948303222655, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.7963166427612305, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.7494002532958985, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.7246106338500975, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.700285081863403, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.665644426345825, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.666615967750549, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.6631852912902834, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.679124746322632, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.6966728019714354, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.754835250377655, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.789841549396515, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02284742477349937, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022556077279150485, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022133709406480195, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02177816047333181, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021481028236448766, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021143407174386085, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020847531417384744, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02060135826934129, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02037521556019783, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020215262789279223, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02011474810540676, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02003237303812057, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019995551872998476, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01998302286490798, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01999295954592526, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020009550205431877, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020035305870696903, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020067165698856117, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020099152834154664, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02012713773176074, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02015793033409864, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020185773568227885, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.020209830487146974, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.020223114895634352, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.020220015933737158, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02017700198572129, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.020099599887616934, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0199976022541523, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.019819885063916444, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.019598244735971094, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.01944912337232381, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.019379314854741096, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0194194177351892, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02039309224113822, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.021672148308716714, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.023157211262732746, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02492213200312108, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02747989257797599, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0294598153186962, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03214176395442337, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03388642120640725, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03558705440722406, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.037530466774478556, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03805080562829971, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03952252528630197, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0406692576687783, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04210494476370513, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.046554008582606915, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.050914477529004215, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1490907669067383, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.144784927368164, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.139104127883911, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.134909152984619, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.131844997406006, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1289315223693848, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1268861293792725, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1255550384521484, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1247036457061768, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1242868900299072, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1241464614868164, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.124054193496704, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1239776611328125, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1237902641296387, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1235437393188477, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.123213529586792, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.122803211212158, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.122292995452881, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.121860980987549, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.121638774871826, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.121661901473999, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1220412254333496, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.122756242752075, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1238887310028076, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.125730276107788, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1289730072021484, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.132673978805542, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1359448432922363, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1384284496307373, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1352477073669434, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.121631622314453, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0978145599365234, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.9951345920562744, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.795114755630493, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6995739936828613, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.6635327339172363, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6289920806884766, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.5880587100982666, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.595391035079956, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.5969042778015137, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.677018404006958, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.666470766067505, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6548519134521484, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.7377655506134033, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.7185535430908203, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.767439126968384, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.807405471801758, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.912670850753784, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1380538940429688, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06201550387596899, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.062200073827980804, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06164636397194537, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.062200073827980804, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06201550387596899, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.062200073827980804, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06256921373200443, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06275378368401624, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.062200073827980804, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06533776301218161, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.06459948320413436, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.06607604282022887, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.06589147286821706, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.06847545219638243, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.07050572166851236, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08157991878922112, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09892949427833149, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.1242155777039498, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.1539313399778516, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.18678479143595422, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19785898855666298, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20450350682908822, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21686969361387967, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2235142118863049, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22554448135843486, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21391657438169065, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21188630490956073, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2129937246216316, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20837947582133629, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.20837947582133629, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.19435215946843853, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.1925064599483204, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.17644887412329271, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.15559246954595793, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012834694692848545, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012363164418648537, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01216204135818926, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012616208733827716, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01164314226690655, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.011540031484729147, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.011073595683531064, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.010816866895130691, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011201155625557319, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.010873261212917073, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.011109146678757524, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.011368631168419754, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.010907072759057697, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.011530520162172047, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.011153632049836432, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.011135983161984412, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.011999246999917479, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.012401041299962707, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.013385192738235935, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01433023479867271, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.01625682437154625, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.0178834451096461, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.017780456904987054, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.017021030903971685, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.01603621621079926, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.013861275950336269, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.013967229777757412, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.014301900177758271, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.01549984388849576, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.020958281886295487, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.031646921047330745, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.045586135165021836, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.0560191834078268, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.08295695612012978, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.10833303180999794, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.126739219489316, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.12690827054587783, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.14337396936163693, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1555601991461469, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15415356813618497, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.14390976263764246, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1433106139600275, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.142477718698486, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.12746666303924895, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.1352044106337146, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.13322518252403306, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.121175091622163, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.12005754995762304, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.09426917365957717, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 2.7494002532958985, "validation/loss_best": 2.5969042778015137, "validation/acc_best": 0.22554448135843486, "validation/f1_best": 0.15415356813618497} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 3.032779140472412, "train/grad": 0.40525465734302996, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.14148681640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.138851318359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.135877685546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.134124755859375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1330126953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.13209228515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13157958984375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.131273193359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.131055908203125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.130760498046875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.13058349609375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.13032470703125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.130040283203125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.12952392578125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.12906005859375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.128656005859375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.128062744140625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.127406005859375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.12678955078125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.12628662109375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.125599365234375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.124892578125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.123790283203125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.122103271484375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.1192205810546874, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.112593994140625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.1024114990234377, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0790325927734377, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.008408203125, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.9139788818359373, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.8284136962890627, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.7568843460083006, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.6519525146484373, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.592987461090088, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.543149166107178, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.503723268508911, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.485233564376831, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.4674496936798094, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.4503439617156983, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.4579476356506347, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4771661233901976, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.49411612033844, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5125575923919676, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5693609523773193, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.6015904259681704, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.724470751285553, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.4930622744560242, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.877021948099136, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.659870491027832, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020809552748687565, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02059273668564856, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020356511170975863, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0202260983036831, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02015433494001627, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020112441354431212, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02010213993024081, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02011074044276029, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02013015475589782, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020152273303829134, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020171658964827657, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020196364275179804, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0202174762962386, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020242607109248637, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020262883831746875, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020278391856700183, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02029514658264816, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02030666300561279, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020308313104324044, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020294940592721106, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.020257052765227854, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02019328431226313, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.020088485539890826, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.019951347531750797, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01983302291482687, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.019846264105290175, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.020059562851674855, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.020649594287388027, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.022327839168719946, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.024753091149032114, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.027017258466221394, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.028787441486492753, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.031329666776582596, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03285663715563714, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03457892251200974, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0359168238658458, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03654152725823224, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0387486285623163, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04016067762859166, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04252895140089095, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.043839982561767105, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04378988546319306, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04557898888364434, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04757487013936043, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05043273670598865, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05612168615683913, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.09833147887140513, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.1280130442418158, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.12954416211694478, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1293160915374756, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1275269985198975, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1258633136749268, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1250579357147217, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1246886253356934, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1244781017303467, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1243972778320312, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1243960857391357, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1244289875030518, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1244895458221436, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1246120929718018, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.124837875366211, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1250901222229004, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.12557053565979, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1260504722595215, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1264657974243164, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.126842498779297, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.126970052719116, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.126568078994751, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.125701427459717, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1239261627197266, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.121245861053467, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.11704158782959, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1098883152008057, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0980286598205566, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.079451560974121, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0481951236724854, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.877397298812866, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.714128255844116, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.61279034614563, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5737810134887695, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5639443397521973, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.556040048599243, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5766773223876953, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5669333934783936, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.616501808166504, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.623284101486206, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6088602542877197, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.613129138946533, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.6520659923553467, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6807665824890137, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.714221477508545, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.698514938354492, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.7458550930023193, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.8143680095672607, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.954310894012451, "validation/loss_046_lr3.6e+01_wd1.0e+00": 29.378328323364258, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.062384643779992616, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.067921742340347, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07715023994093761, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08324104835732743, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.09523809523809523, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.11221853082318199, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.14101144333702473, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.1921373200442968, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21631598375784422, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22739018087855298, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23237356958287192, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2397563676633444, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23772609819121446, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23735695828719083, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2294204503506829, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21631598375784422, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21963824289405684, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2174234034699151, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2157622739018088, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20985603543743078, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21668512366186785, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2059800664451827, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.2100406053894426, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.1984126984126984, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.051310446659283866, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011324502146800102, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010235710386570998, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010491250863873864, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01127089141280429, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010793636688030883, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01085039070866872, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.011314611710876055, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011531668329035315, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.012184820112822577, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.012605594955681955, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013027524346977149, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01317704665599534, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013440526981278442, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01302196642540422, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.012966622877367375, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.012864270866396398, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.012517105434292457, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01228454544825254, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.01249744641977337, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.012300337805613742, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.012345400873127901, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.012572029477277993, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.014458738038854757, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.017399090502434916, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.02130426124048676, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.031126143352759945, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.04492938262401139, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.06760489885640697, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.12596335174557605, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.14743492111232342, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.15074049846891127, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.15895463058055714, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16766855835775493, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16749199419425043, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1714287389591985, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15977974786002588, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.14884032637885694, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15531346314391792, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1596618758621822, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1700210973500065, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15503312904536595, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.13250368127213022, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1550063129210187, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1343432225606027, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.1479149084935544, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.12697798191598778, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.005364085676765505, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 2.6519525146484373, "validation/loss_best": 2.556040048599243, "validation/acc_best": 0.2397563676633444, "validation/f1_best": 0.16766855835775493} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 2.982503706216812, "train/grad": 0.4207539715617895, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.131639404296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.13089111328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.130009765625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1296533203125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.129227294921875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.128914794921875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12845947265625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12800537109375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.127440185546875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.126959228515625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1263671875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.125606689453125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.12480224609375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.1238623046875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.122969970703125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.122138671875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.12117919921875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.119869384765625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.1182763671875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.116292724609375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.112430419921875, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.1051483154296875, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.09117919921875, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0636578369140626, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.9826760864257813, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.8581735992431643, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.746004791259766, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.658119697570801, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.575370845794678, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.5140856552124022, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.468625545501709, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.4358888149261473, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.399825134277344, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.3890500450134278, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.3674819087982177, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.362183666229248, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.38289466381073, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.3856503915786744, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.390742106437683, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.432715132236481, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4690805196762087, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.5093892335891725, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5637019085884094, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.6246026945114136, "train/loss_044_lr2.6e+01_wd1.0e+00": 5.825455764532089, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.69440732717514, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.877829284667969, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02003373946994543, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020010835262946784, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020010209497995673, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020025390908122064, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020040924632921814, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020060391710139812, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020076433215290308, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02009197778534144, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020108896996825932, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020124800251796842, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020136764934286476, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0201516637718305, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020162270511500536, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020168241625651718, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020164845804683865, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020152616277337075, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0201261451235041, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020078044584952295, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020002878811210392, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.019921306180767717, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.019848342728801073, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.019904943266883492, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.020188171193003656, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02083489393349737, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.023148028403520583, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02653386493213475, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.029010839629918336, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03089749668724835, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03263977280817926, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03412155731581151, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.035517682367935774, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03688775234855712, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.037828933959826824, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03825533114373684, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.039971703197807074, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04092879315838218, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04210552820935845, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04356121055781841, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04527724981307983, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04830604322254658, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05026097599416971, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05218109486624598, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05444634361192584, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.056864288467913866, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.18257471395656466, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.12308186063542963, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.02667565941810608, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1248092651367188, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1244425773620605, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1241917610168457, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.123997211456299, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.123872756958008, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1236190795898438, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1233389377593994, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1230101585388184, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.122544288635254, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.12204909324646, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1215689182281494, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1208972930908203, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1203324794769287, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1196510791778564, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1191601753234863, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1188251972198486, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.118593692779541, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1183364391326904, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1175119876861572, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.115326404571533, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.108586311340332, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.094085693359375, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.066530704498291, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.9437074661254883, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.736359119415283, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6522417068481445, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6319024562835693, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6163036823272705, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.605008840560913, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5990447998046875, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.580280303955078, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5696117877960205, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.651939630508423, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.6210875511169434, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6304283142089844, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.6925806999206543, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.794238567352295, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.75305438041687, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6629934310913086, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.830160617828369, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.856684923171997, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8731629848480225, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.807450294494629, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.8618903160095215, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06921373200442968, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07364341085271318, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08379475821336287, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.09966777408637874, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.13049095607235142, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.1716500553709856, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.19232188999630861, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2011812476928756, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21133259505352528, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22129937246216316, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22702104097452935, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22960502030269472, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22960502030269472, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21244001476559615, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22314507198228128, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22831303063861202, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22240679217423404, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20671834625322996, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2024732373569583, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22665190107050573, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21613141380583242, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20431893687707642, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2222222222222222, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2087486157253599, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2081949058693245, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011869661082746346, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011321873166908974, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.011495477497551263, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011706943653185335, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01154900434271454, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01141716992573055, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.011986758512220363, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011766133307449465, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.012507585265198214, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0142936014267084, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015081247105877165, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01547496461810542, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01533071252338543, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01631880752391678, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01809506008056312, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.018648405231196503, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.018179696672960113, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.018048119815546798, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.018679062064003372, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.020752964014274306, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.024679876476355486, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.036002489368860184, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.05282400942633311, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.07179022702998378, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.10401925163614471, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.12373310774302804, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.13312900847739606, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.14268044477407374, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.14974160530445338, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.15573581121448452, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.15666754945712463, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.15357072874239042, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1421255883060599, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1492201580558952, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16298370725909803, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15886851695354628, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.14215940542930344, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.14282204377195215, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16077388037626292, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15178498257862993, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.13678754621246278, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.14627516390836012, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.14731716470854025, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1419183743511302, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 2.468625545501709, "validation/loss_best": 2.580280303955078, "validation/acc_best": 0.22960502030269472, "validation/f1_best": 0.15666754945712463} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 2.927880413532257, "train/grad": 0.4014571599662304, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.132203369140625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.131898193359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.131396484375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.130970458984375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.130479736328125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.129967041015625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.129234619140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1284765625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.127626953125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12677001953125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.125845947265625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12466796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1235498046875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.121966552734375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.120299072265625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.118662109375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.11608154296875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.11158447265625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.102298583984375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0878741455078127, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0602484130859375, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.977608642578125, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.840062561035156, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.697644844055176, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.6017637252807617, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.5242442321777343, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.469605598449707, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.428025093078613, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.384320030212402, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.361396942138672, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.3275847816467286, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.300536775588989, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.291931486129761, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.3060556411743165, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.3011501741409304, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.3072717237472533, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.3420361733436583, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.3633146023750307, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.395570688247681, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.434005262851715, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.5112363123893737, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.5788657379150393, "train/loss_042_lr1.9e+01_wd1.0e+00": 5.7469360268116, "train/loss_043_lr2.2e+01_wd1.0e+00": 4.603498990535736, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019839732297696172, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019853223832324147, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019869252927601336, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019880164521746336, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019887776072137056, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019896813267841935, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019903889526613056, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019912043516524137, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019920610138215125, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019928804328665138, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01993400024715811, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019935873942449688, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01993081578053534, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01990863929502666, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019868823373690246, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01981818990781903, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019744162522256373, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.01968632738571614, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019787672017700972, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02014121904503554, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02087948243599385, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.023126140870153904, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.026700261430814862, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.029685011208057402, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03152977400459349, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03305170911364257, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.034131526099517945, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.035035179574042556, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03591493302024901, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.036371951000764964, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03758593276143074, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03909165528602898, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0406425192207098, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04166914159432054, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04382672496140003, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04455038007348776, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.045241846982389686, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.046828467696905136, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04976521536707878, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05159347800537944, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05673935992643237, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06143490659072995, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.18676913101226092, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.1286899609863758, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1245276927948, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.124512195587158, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.124486207962036, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1244397163391113, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.124406576156616, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1243531703948975, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.124237298965454, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1240360736846924, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.123718023300171, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.123269557952881, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1227505207061768, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.121826171875, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1209070682525635, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1195428371429443, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1182596683502197, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1168925762176514, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1144914627075195, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1089906692504883, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.0970613956451416, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0760858058929443, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.0033674240112305, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.77038836479187, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.648088216781616, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.587078809738159, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5566928386688232, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.545933246612549, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5308644771575928, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.530949831008911, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.548924446105957, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5924222469329834, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.63067364692688, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.6097047328948975, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.6379292011260986, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.689926862716675, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6665754318237305, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.7007334232330322, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.745063543319702, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.761151075363159, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8063669204711914, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8899893760681152, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.989993095397949, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.133425235748291, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06090808416389812, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06109265411590993, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.060723514211886306, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.061461794019933555, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07198228128460686, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07807308970099668, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.08619416758951642, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.11092654115909929, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.165374677002584, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.20302694721299372, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22111480251015134, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2353266888150609, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23846437799926173, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24197120708748615, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23329641934293097, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23311184939091917, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2129937246216316, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21982281284606867, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21926910299003322, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21502399409376152, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21059431524547803, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.19952011812476927, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20228866740494647, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.1788482834994463, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18660022148394242, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1786637135474345, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009456898183473958, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009895314237478012, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00986881364320752, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.0102915537551123, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.011429984294667536, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.012467693931707024, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.012949171071005197, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012684477543165285, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013375574924039518, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.015456798856665455, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015685438865057463, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.016744135371641124, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.018125664129295168, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01805524192726499, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.019140994352888848, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.020277788071291384, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02133169034542276, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.02379591047285744, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.029631732544162754, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.03834228505447541, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.05965339599774074, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.0983253225091678, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1264324808482866, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.14252688525789017, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1569945386772004, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.16141863737351184, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.17107252637679884, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1788974632604687, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17392858915892329, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17458926795439852, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1578656040334994, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1647013517457221, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16861470544658963, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16446093456527486, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16994004653956873, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15905524424309284, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.15254539080706864, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15149375502350657, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.14312903583063283, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.12909341674546349, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1381285024857675, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1239650299241742, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 27, "lr_best": 0.00047999999999999996, "wd_best": 0.05, "train/loss_best": 2.428025093078613, "validation/loss_best": 2.530949831008911, "validation/acc_best": 0.24197120708748615, "validation/f1_best": 0.1788974632604687} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 2.7724902868270873, "train/grad": 0.22721013203263282, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.125198974609375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.124678955078125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12400146484375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12339111328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12282470703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12200927734375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12113525390625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12015380859375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.119005126953125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.117999267578125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.116904296875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.115538330078125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.113873291015625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.111328125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.108046875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.103145751953125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.092083740234375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0698651123046874, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.03036865234375, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.9218812561035157, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.7517267990112306, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.621546401977539, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.529572525024414, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.45921480178833, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.3941733741760256, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.3406359577178955, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.3003857421875, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.2693173694610596, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.245551128387451, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.243912434577942, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.222011079788208, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.1927216839790344, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.198313090801239, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.2178312397003173, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.209761803150177, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.2335702657699583, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.276154646873474, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.293095500469208, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.3201725673675537, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.393002563714981, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.429607651233673, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.503189774751663, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020117867197841408, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020128627796657382, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02014344767201692, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02015425611753017, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020163951520808042, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020178190539591013, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020190846966579558, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020203438722528517, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02021651538554579, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02022316020913422, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02022443708963692, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02021789495367557, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020202116370201112, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020164239625446497, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02013801105786115, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020173157225362958, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020440518776886164, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0211594762140885, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02213358396664262, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02439675589092076, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02898077867925167, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.032200665464624766, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03320092256180942, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0339891098998487, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.035310217486694453, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03656370443291962, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03773661815561354, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.038603824730962515, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.039629776943475005, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04046483527868986, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04250155178830028, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04415321296080947, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.045179776679724455, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.045303096119314434, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.047057348135858774, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04817396046593785, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04815689573064447, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04982388462871313, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05142766155302524, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.055829627607017755, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.057422999888658524, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.059424070846289395, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1232588291168213, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1229636669158936, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.122527837753296, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.12209415435791, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1216914653778076, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1211888790130615, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1206114292144775, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.120004415512085, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.119241237640381, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.118429660797119, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.117586612701416, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1162214279174805, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1146998405456543, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1117982864379883, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.107234477996826, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.0996897220611572, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.084268093109131, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.056607484817505, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.9794976711273193, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.7722976207733154, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.643913984298706, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.601292610168457, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.567608594894409, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.544316291809082, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5409388542175293, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5346195697784424, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.549741506576538, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.585023880004883, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6079142093658447, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.6207830905914307, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7133264541625977, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.6929023265838623, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.651546001434326, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.696748733520508, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.715649366378784, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.8058764934539795, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.7792954444885254, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.795304536819458, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8805270195007324, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.912611961364746, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.9536845684051514, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9200830459594727, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06127722406792174, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06164636397194537, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07308970099667775, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07438169066076043, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07659653008490218, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.08194905869324474, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.09892949427833149, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.1168327796234773, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.16315983757844224, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.20671834625322996, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.21760797342192692, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.23329641934293097, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2427094868955334, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2441860465116279, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24160206718346253, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2336655592469546, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23015873015873015, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2364341085271318, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21982281284606867, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23089700996677742, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22831303063861202, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2159468438538206, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21650055370985605, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2054263565891473, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20431893687707642, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.19785898855666298, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.18992248062015504, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.18235511258767073, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.1834625322997416, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1790328534514581, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009141833540601467, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009440908290292609, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00973732882644815, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010187240722874425, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.009946418004996047, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.009842213783926886, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009298601613150171, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.010068937281701705, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.010252707006502349, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.010378371147761323, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.010608485911401268, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.011936818044096804, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013776610292724846, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01683220202744419, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.021840196774015736, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.025374167910901062, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.030492279391325202, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.0449755437367575, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.056923848791881206, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.09061321434870177, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.12254041550779284, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1320219390751048, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1505885029931868, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.16134860202451098, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.16449338303530092, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.16911638742453994, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.16956863746313666, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.16218273236871686, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.16051935845839915, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.16071588741564138, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.155733273555782, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.16818676026867238, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16881879746027506, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15250118272727836, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.15881979167313612, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1510035442618122, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1570707686477241, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16134677591311794, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.14940447970807344, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1427968095625962, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.14133860605813262, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1484413275491299, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 2.3406359577178955, "validation/loss_best": 2.5346195697784424, "validation/acc_best": 0.2441860465116279, "validation/f1_best": 0.16911638742453994} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 2.7184387016296387, "train/grad": 0.23448088511824608, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1289306640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.128411865234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12743408203125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12657470703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12576904296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12468505859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.123475341796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.122265625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.120692138671875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.119127197265625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.117442626953125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.11499755859375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.112091064453125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.105860595703125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.09538330078125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0786083984375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0478045654296877, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.9728936767578125, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.786612548828125, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.654165153503418, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.54760196685791, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.468311996459961, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.3911519050598145, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.3331062412261963, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.2808283805847167, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.2350579261779786, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.198268747329712, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.1720865392684936, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.1545900011062624, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.1485314702987672, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.1222484064102174, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.0932453751564024, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.104640316963196, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.1387093353271482, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.1534890294075013, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.1743953323364256, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2135671019554137, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.237367832660675, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2809974145889282, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3221029150485992, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4234569883346557, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4733936083316803, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020080847092904152, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020085354018956423, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020090642664581538, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020094697438180445, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020097117093391716, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02010171614587307, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020103587303310633, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0201018483703956, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02009425835683942, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02008201883174479, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02006236686371267, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020024763755500316, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019984520468860865, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019975219368934632, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02016763708088547, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02067062184214592, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02161898960825056, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02334294082596898, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02787575435824692, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03152387840673328, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03340134343132377, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03411346736364067, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03509905179962516, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0359066201094538, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03722046719864011, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.038568154610693454, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.039586066585034135, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.040766260344535114, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04162517752498388, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04168451055884361, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04359623070806265, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04509186629205942, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04583161115646362, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04594304079189897, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.048791353423148394, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04903466274961829, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04882984085008502, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.050295645501464606, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05233210332691669, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05474914724007249, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.06004379045218229, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.060349692907184366, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1227238178253174, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1224172115325928, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1219654083251953, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.121556282043457, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1211719512939453, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.120723247528076, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.120241641998291, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1196701526641846, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.118943691253662, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1181230545043945, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.117176055908203, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.115306854248047, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1124117374420166, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.104139566421509, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.088106632232666, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.064316749572754, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.02856183052063, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.832736015319824, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.6661911010742188, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5906858444213867, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5540363788604736, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5347015857696533, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5210556983947754, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.521099328994751, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5359628200531006, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.550571918487549, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.574237585067749, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5989816188812256, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6043484210968018, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.6413862705230713, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.6852383613586426, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7227423191070557, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.7046873569488525, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.767052173614502, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.7072436809539795, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.790917158126831, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.8712618350982666, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.7664594650268555, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8159966468811035, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8924646377563477, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8354580402374268, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.934471607208252, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.067921742340347, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.08231819859726837, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.09228497600590624, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.10686600221483943, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.15171650055370986, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.20228866740494647, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.22056109265411591, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.23311184939091917, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24012550756736803, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24510889627168697, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24160206718346253, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23569582871908454, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23329641934293097, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22757475083056478, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22997416020671835, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23421926910299004, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21668512366186785, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2201919527500923, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2233296419342931, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21244001476559615, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2216685123661868, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2102251753414544, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20044296788482835, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22000738279808046, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20524178663713546, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19010705057216684, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2009966777408638, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.18327796234772978, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009341474729595817, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009627565723106536, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.009971963925502375, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010195885450978376, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010865735234945453, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.011416444138274166, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01256235793273437, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.013667182101805257, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013454824795651535, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014185098057826246, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.016391771612414837, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.018559080481124427, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.02109914342838523, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.02729306082353723, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.03558095528716999, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.04565049432787635, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.05667130271117488, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.0883438779109962, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1293292243437497, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.14496353522856775, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.15968927468236369, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.16894674618154948, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.17864384182446183, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.17734559621241783, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1717480639314549, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.17107855783757628, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.17099179460084524, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.16652265696592516, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17387132400465077, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17549082434150987, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.16946613200479624, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.16948558607746253, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1721016659607554, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16100922999083725, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17030453135375692, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1599782273779715, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.15803641739913557, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16860805431422962, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16166047645193468, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.14369022876214818, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.14766589363325314, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15726055466320102, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 22, "lr_best": 0.00021599999999999996, "wd_best": 0.05, "train/loss_best": 2.3911519050598145, "validation/loss_best": 2.5210556983947754, "validation/acc_best": 0.24510889627168697, "validation/f1_best": 0.17864384182446183} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 2.64948189496994, "train/grad": 0.2377804220467806, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.127781982421875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12702880859375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.125872802734375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.124844970703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1238916015625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12246337890625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12107177734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.119442138671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.117386474609375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11513671875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11276123046875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.108416748046875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.102171630859375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.086219482421875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0598370361328127, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0263409423828125, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.9283624267578126, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.719639892578125, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.583172149658203, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.4966984176635743, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.409024143218994, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.340474443435669, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.267430124282837, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.2095979976654054, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.158641147613525, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.110996708869934, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.072798867225647, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.0493551015853884, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.0356894636154177, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.0408084630966186, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.0160611033439637, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.9877433824539184, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.004609307050705, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.0507242465019226, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.046884274482727, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.0690208435058595, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.12220737695694, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.1520379257202147, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.1948506796360014, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.22663577914238, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.2895637309551238, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.31779470205307, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01963072735350579, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019634210630320014, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019637817204929887, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01964003894943744, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019640589612536133, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01963995317928493, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01963633661624044, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019627600708045066, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01960994739551097, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0195845733769238, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019553759316913784, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01951658899895847, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019537506932392715, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019868015381507575, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020697953696362674, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021810431443154813, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.024081710120663048, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.028338309600949286, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03176371293142438, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03338314569555223, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03444031541235745, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03525758062489331, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03643309885635972, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03732292233966291, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03848232071846724, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.039683557022362946, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04091957999393344, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04229099288582802, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04372824098914862, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04517502520233393, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04669017655774951, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04774281373247504, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.047886725589632986, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04818783974274993, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.048784348834306004, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05006262443959713, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04941419653594494, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05115067629143596, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05315806357190013, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0530207671970129, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05507106559351087, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.054307736549526454, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1223576068878174, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122044086456299, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121593475341797, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1212077140808105, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1208696365356445, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1203789710998535, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1199402809143066, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.119431972503662, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1187641620635986, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1179287433624268, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.116835594177246, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1139867305755615, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.108360528945923, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.0911214351654053, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.0631866455078125, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.0309438705444336, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.828242778778076, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.671660900115967, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5818722248077393, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5516865253448486, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5374667644500732, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5355567932128906, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.541468381881714, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.569856882095337, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5892577171325684, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.610710382461548, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6539559364318848, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6872329711914062, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.704531669616699, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.6998989582061768, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.8024909496307373, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.848137140274048, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.880952835083008, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.8712966442108154, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.826190948486328, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.969270706176758, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9452006816864014, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.980231523513794, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9498369693756104, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.070521831512451, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1432511806488037, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.193025827407837, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.067921742340347, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0843484680693983, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.1035437430786268, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.11332595053525286, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.16020671834625322, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.20376522702104097, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.22702104097452935, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.23163528977482467, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2364341085271318, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.23791066814322628, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24197120708748615, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2364341085271318, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23052787006275377, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22886674049464747, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22111480251015134, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22314507198228128, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2024732373569583, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20782576596530086, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20155038759689922, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.1939830195644149, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2115171650055371, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19361387966039129, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20801033591731266, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.18678479143595422, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.19619785898855666, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.17977113325950536, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.16223698781838317, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.18992248062015504, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010829925744096162, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011006156410889997, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.011850779205203374, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01252775817515691, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012642620234975742, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013255291485448321, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014948305572958273, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.015446406833232269, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.016465670690628164, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.017236316782750748, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.018246527326731935, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.018790654890312456, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.020777838181625718, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.03167131043643511, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.04659756296010383, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.05802544311521637, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.08816258922324526, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.12515923148563632, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1447213845228444, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.15334632032954723, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1630606075163917, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.16730565946260448, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.176160391024239, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.17324825734280666, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1738742993642283, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.17807287271913272, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1755253792023301, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17448567384705624, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1713592091331706, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1807009691628636, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.16933879818554565, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17196471126021903, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.162539887792576, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1587717003614695, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16770630774139691, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1511928778836225, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1558840952823203, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.14187563019057012, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.14813966640988163, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.14006825500509879, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.12467351933506421, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.12271349867744841, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 22, "lr_best": 0.00021599999999999996, "wd_best": 0.05, "train/loss_best": 2.267430124282837, "validation/loss_best": 2.541468381881714, "validation/acc_best": 0.24197120708748615, "validation/f1_best": 0.176160391024239} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 2.59423797249794, "train/grad": 0.24831966258585453, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.123233642578125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.122391357421875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12118408203125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.119862060546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1187255859375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1171142578125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.115518798828125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11349609375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.11103759765625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.108232421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.10485107421875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.09750244140625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.0852490234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0538427734375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0119586181640625, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.9305084228515623, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.731855926513672, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.598008728027344, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.482783737182617, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.4024211502075197, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.322579708099365, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.257496795654297, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.182827377319336, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.1270878887176514, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.0750405263900755, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.0255339097976686, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.985459942817688, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.959078493118286, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.9434342360496522, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.9395692247152327, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.9077212131023407, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.883366469144821, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.9119311845302582, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.9511834347248078, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.9634135085344315, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9987757074832917, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.036871879696846, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.065752749443054, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.104521369934082, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.1493540620803833, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.1889291083812714, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.2648316049575805, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02025110517628491, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020255912919528784, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02026080806273967, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02026497261133045, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0202671459550038, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020267908279784023, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0202635862166062, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020254615168087183, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020237405635416508, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020214896332472563, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020198800428770484, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02024749763775617, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020517178671434523, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021535833794623614, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.023052385123446584, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02506508447229862, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0297133316565305, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03293174907565117, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.035548653798177836, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03647934482432902, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03703938235528767, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03766883303411305, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03894271216355264, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04002740893512964, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.041285916194319726, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04275756539776921, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.044034905657172205, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04531463144347072, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.046140904724597934, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04713334511965513, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.048388397991657256, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04953429754823446, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05075735965743661, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04980365179479122, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05151837704703212, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05166203504428268, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04953424084931612, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.050946074873209, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.051478028297424316, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05343551754951477, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0542063719779253, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.056334843877702954, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1222574710845947, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.121878147125244, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121295690536499, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1207354068756104, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1201789379119873, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.119408369064331, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.118544101715088, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1175756454467773, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.116109609603882, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1141536235809326, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1113388538360596, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.103823184967041, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0901613235473633, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.0585334300994873, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.0229568481445312, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.861731767654419, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.7063512802124023, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.602837562561035, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5556046962738037, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5441179275512695, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5573372840881348, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5676474571228027, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.582871913909912, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.620361804962158, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6376230716705322, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.639677047729492, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6651101112365723, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6941678524017334, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.759441614151001, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.849759817123413, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9724936485290527, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.9477224349975586, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.9102365970611572, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.9180238246917725, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.909759044647217, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0849456787109375, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.089167356491089, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.054791212081909, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.029608726501465, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.962933301925659, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0360336303710938, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9134225845336914, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07770394979697305, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.08379475821336287, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.09745293466223699, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.1094499815430048, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.1509782207456626, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.18826135105204872, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2216685123661868, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.23274270948689554, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24160206718346253, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.24049464747139165, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2425249169435216, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.23846437799926173, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22812846068660023, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.22720561092654115, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23145071982281284, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2262827611664821, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22369878183831673, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21686969361387967, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21779254337393872, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.20025839793281655, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2129937246216316, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21686969361387967, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2054263565891473, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2100406053894426, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20210409745293467, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20671834625322996, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20358065706902917, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20007382798080472, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19601328903654486, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18807678110003692, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19748984865263935, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010707511762717966, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010646947463938452, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012262890466493842, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013581960959850476, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014725795880209114, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.016377534119851003, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019122239110916026, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.020939802094018703, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02320799618200089, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02507488975621858, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02770605927036725, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.033525414775294006, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.03991060530700229, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.051148711099799536, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.06239849038507093, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.09263145873280515, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.12592815437783705, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.15470757223911658, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.16457284364839578, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.17397382744446485, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17433567824279295, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.17815545902010818, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.17454511988119026, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.16612452538872202, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.16793237687589613, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.17782182678438127, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18059055142113714, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18005437724154136, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.16886991416854877, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1584435874154622, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1479586883816978, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.15664123996997648, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1645043795208258, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.14718845143497408, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.15946070936490084, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.14358761516852026, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1429003864348344, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.14924070902839778, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.14470827510369436, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1519301769341909, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.13597723758008573, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1481908853282179, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 2.257496795654297, "validation/loss_best": 2.5676474571228027, "validation/acc_best": 0.2425249169435216, "validation/f1_best": 0.17815545902010818} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 2.552751398086548, "train/grad": 0.25225863702595236, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.121334228515625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.120391845703125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11884033203125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.117391357421875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.116016845703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11418701171875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11220703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.10991943359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.106690673828125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.10265625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0975146484375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0847735595703125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.06345703125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0178009033203126, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.94989501953125, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.759537353515625, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.626070098876953, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.517108612060547, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.408638916015625, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.339110984802246, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.2686894226074217, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.2022850799560545, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.1253701877593993, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.06662926197052, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.0134055042266845, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.956785581111908, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.910691043138504, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.8794303351640702, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.8682306855916977, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.8798427498340606, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.8422540593147279, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.811015292406082, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8285724860429764, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.8721283519268035, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.8832914406061172, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9204683405160905, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9805014085769654, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.010120021700859, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.065055701136589, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.119345133304596, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.1714406532049177, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.2026009303331375, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0201426051184535, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020143913999199868, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020148499649949372, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020147371287457645, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020145185366272925, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020141606908291577, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020131966238841413, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020119968582876027, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020100827640853823, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020089015732519328, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020113573209382594, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020354078570380806, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02101973321288824, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022756431121379136, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.024774283953011036, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.028833572128787635, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03198872836306691, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03435749225318432, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03658918580971658, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.037164516150951385, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.037580810710787775, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038532863929867746, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.039851054958999155, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.040838489029556516, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04207401914522052, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04334472173824906, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04449749728664756, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04571088645607233, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04709267692640424, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04867149107158184, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.050055369436740875, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05045879557728768, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05009061332792044, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.049555848184973, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05078848928213119, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05115086015313864, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05018084591254592, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05134265024214983, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.053155030310153964, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.054988651052117345, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05706335788592696, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05500428173691034, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1216790676116943, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.121236801147461, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.120575428009033, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.119950294494629, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.119342565536499, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1185731887817383, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.117614269256592, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.116464376449585, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.114572525024414, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1115942001342773, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1068811416625977, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.093822479248047, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.072416067123413, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.0346500873565674, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.927504777908325, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.7606215476989746, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.6431455612182617, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.575773000717163, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.526151180267334, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5184760093688965, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.534081220626831, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.54522967338562, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5599539279937744, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5998289585113525, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6374497413635254, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.678934335708618, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7020890712738037, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.712874412536621, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7435014247894287, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7943918704986572, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9134202003479004, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.901340961456299, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.860769271850586, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.886552572250366, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.98442006111145, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.9726972579956055, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9547672271728516, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9451353549957275, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.912123918533325, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.902170419692993, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.9854936599731445, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.987858533859253, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07161314138058324, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07198228128460686, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.08231819859726837, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0932078257659653, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.10428202288667406, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.12661498708010335, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.16352897748246586, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.20191952750092285, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2249907715023994, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24492432631967515, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24916943521594684, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.24787744555186417, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24234034699150978, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24307862679955702, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2364341085271318, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23292727943890734, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22259136212624583, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22517534145441123, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21650055370985605, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21779254337393872, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.19952011812476927, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20948689553340716, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20893318567737174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20911775562938353, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20025839793281655, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19232188999630861, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19896640826873385, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.1893687707641196, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.19804355850867478, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19509043927648578, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18881506090808417, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.17811000369139904, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010379333792164819, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011229401197364793, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01215368128677109, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012654976824552263, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013064294204247429, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.015304737581267302, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.017085486063623926, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.019478945889860603, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02174610549292551, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02318711257323325, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02690436510624372, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.036759828625288864, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.046525011145247853, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.05296648523330335, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.06893875121499463, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.09964966465287532, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.12955772261726714, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.14906282105035937, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.16783607856627103, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1734441928329378, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17760838785022517, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.17601759436572725, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18577721019015458, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18582265501237838, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18418103150804285, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18822761748714126, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18566261564217681, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19147754738506162, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18509412341375872, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1889822640128651, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17287062977816933, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17604958785129102, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17929817600535083, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1663359780274739, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1564831405825986, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15929376242851812, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.15946656974308313, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1553598504409854, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1524288875930927, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1520376295259199, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.13930717467092466, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1430166430515166, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.339110984802246, "validation/loss_best": 2.5184760093688965, "validation/acc_best": 0.24916943521594684, "validation/f1_best": 0.1734441928329378} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 2.501012989282608, "train/grad": 0.25196973226964475, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.121038818359375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12003173828125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.118486328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11697998046875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1154833984375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.113551025390625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.111444091796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1089013671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.105316162109375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.100189208984375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.092806396484375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.07385498046875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.0441796875, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.98916259765625, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.8173757934570314, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.6823965454101564, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.5631134033203127, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.4614276123046874, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.3506117248535157, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.284020938873291, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.2124181270599363, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.1437119674682616, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.0621157836914064, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9945225191116334, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9357875299453735, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8676233530044555, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8174969363212585, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.7932428479194642, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.7720601403713225, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.7658052796125412, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.7327666592597961, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.7091439378261566, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.7306989175081253, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7775862342119217, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.7931976735591888, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.829988916516304, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.8862810391187668, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.910126959681511, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.9636655449867249, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.9942372953891754, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.0617722207307816, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.101892141699791, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019859990938566625, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019861384369432925, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019862255197949708, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019861918580718337, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01985962426289916, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019854010292328896, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019844782752916217, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019833937687799334, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019822035916149616, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019841611539013683, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01995099676772952, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020490761161781848, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02156602549366653, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0237681816983968, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.027711809519678353, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.030854133386164904, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.033428703928366305, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.035653459476307034, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03759595764800906, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03791429738514125, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.038161303978413344, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03895694768056274, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04015877751633525, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04105879371985793, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04232834257185459, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04355472328141332, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04490480680018663, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.046249492149800064, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04704478297382593, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04768750919029117, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.049856386706233026, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05063957780599594, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05084100112318993, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05085022723302245, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.051609859876334666, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05200887799263001, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05095195211470127, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05062578013166785, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05102278739213943, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05160217866301536, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05400628382340074, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05301735645160079, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1214888095855713, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1210973262786865, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.120493173599243, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.119925022125244, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1193060874938965, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1185359954833984, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1175191402435303, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.116124153137207, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1136398315429688, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1093263626098633, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1021745204925537, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.08274507522583, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.055055618286133, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.010376214981079, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.8000338077545166, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.693737030029297, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5977976322174072, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5457794666290283, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5128908157348633, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5148706436157227, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5361130237579346, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.551295042037964, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.577324628829956, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6271512508392334, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.660642623901367, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6769111156463623, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7304463386535645, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7765491008758545, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.851364850997925, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9268763065338135, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0276451110839844, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.014186382293701, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.9812543392181396, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.9930100440979004, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.9456467628479004, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0124988555908203, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9681241512298584, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.939150333404541, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9620437622070312, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.972923994064331, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0353357791900635, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.913602352142334, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07770394979697305, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.08914728682170543, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.10243632336655592, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.11018826135105204, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.162421557770395, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.1921373200442968, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.21853082318198597, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.23385012919896642, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24621631598375784, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24806201550387597, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.24012550756736803, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2368032484311554, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2351421188630491, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22702104097452935, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2262827611664821, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22757475083056478, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2220376522702104, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21557770394979697, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20837947582133629, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20911775562938353, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.19896640826873385, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20634920634920634, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20930232558139536, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20468807678110004, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20782576596530086, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20321151716500555, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21207087486157253, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2102251753414544, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2039497969730528, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19712070874861573, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19748984865263935, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2074566260612772, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012198214487796864, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013986109675936503, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.015045273438900489, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015312211894269076, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015656604394850376, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01577903095673726, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.017103362327663896, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01857677470725978, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02112287537317767, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02579418261431171, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.030948304879615432, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.04092929703500992, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.05321396499596872, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.06107776501351652, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.09829643849574332, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.12356176082364224, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.14372139269378162, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.15723260564605748, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.17017876982641922, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.17617514178120852, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17257391048843437, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.17492575637604582, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.17989604955699467, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.17637164650683376, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.17984025609765866, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18716147089012578, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1834512012319601, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1754144951459958, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.16590946296873568, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.16517006022423272, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.15546854789439732, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.16302974922200347, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1675477403729065, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15603682857776036, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16294405643001167, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16085535962841704, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16173279870263194, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1639400874456132, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.15674736726873276, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15094316111409886, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1503681336847833, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16419330867054344, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.284020938873291, "validation/loss_best": 2.5148706436157227, "validation/acc_best": 0.24806201550387597, "validation/f1_best": 0.17617514178120852} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 2.4670093381404876, "train/grad": 0.2581158122420311, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.125076904296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.123909912109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12219482421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.120596923828125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11893310546875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1168798828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.114427490234375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11148681640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10699462890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.100091552734375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0894384765625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0625830078125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.02494140625, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.9507489013671875, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.7408111572265623, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.6329745483398437, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.532059097290039, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.4227826690673826, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.3070349311828613, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.2411373901367186, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.168440580368042, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.093950572013855, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.010775017738342, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.945951759815216, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.884261074066162, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8228216874599457, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.772086330652237, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.7433328187465669, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.7181972175836564, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.7078615945577622, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.672171567082405, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.645528082847595, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6581109565496446, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7150718080997467, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.7186530286073685, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.7478610664606093, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.8170256155729294, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.8473041015863418, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8892482668161392, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.9359164160490037, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.9705446296930313, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.030116837620735, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019962686109356583, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01996223222464323, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01996226126793772, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019961515883915128, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019959719451144338, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019955869279801846, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01995038095396012, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0199446165189147, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019950383747927843, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020020918715745212, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020253083510324357, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021115011386573315, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02258522419258952, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.025038684904575347, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0298648586217314, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.032508445773273706, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03452942145988345, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0370675048045814, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03874769953079522, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.039019305733963845, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03938079030252993, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.040377950370311735, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04174479762092233, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.042872224859893324, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04433656454086304, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04607406530529261, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04739225775003433, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04855260904878378, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04947130050510168, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04981177588924766, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.051445465423166754, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.052536289989948276, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.052246898561716076, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0517101538926363, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05191943574696779, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05183714035898447, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05085175920277834, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05116040157154202, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.051337738782167436, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05218401385471225, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05339613907039165, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05233803927898407, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.121366262435913, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1208853721618652, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1201255321502686, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1193835735321045, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.118626594543457, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1175732612609863, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.116255521774292, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.114483118057251, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1111576557159424, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.105330228805542, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.09543776512146, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0706896781921387, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.040583848953247, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.950517177581787, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.756263017654419, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.6523752212524414, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.591738224029541, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5432326793670654, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.518733501434326, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.525949716567993, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.549865961074829, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5674490928649902, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5903518199920654, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6349775791168213, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6755425930023193, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7176685333251953, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7721664905548096, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.799710512161255, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.845912218093872, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.912809371948242, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.040506362915039, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0003716945648193, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0514142513275146, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.044426202774048, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.975353479385376, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.031569719314575, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.963775634765625, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.8913981914520264, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8597471714019775, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8555333614349365, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.898205280303955, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8291594982147217, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07401255075673681, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.08065706902916205, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.09542266519010704, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.10557401255075674, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.12993724621631597, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.1714654854189738, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2069029162052418, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.22425249169435216, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.23717238833517904, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24455518641565152, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24160206718346253, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.23883351790328536, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.23791066814322628, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2355112587670727, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22517534145441123, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.22960502030269472, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.21705426356589147, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2157622739018088, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2129937246216316, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.19804355850867478, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21354743447766703, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21096345514950166, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20782576596530086, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21096345514950166, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20801033591731266, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21059431524547803, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2117017349575489, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21262458471760798, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21262458471760798, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21225544481358435, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013034151768959371, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01379162406949984, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014547363797070494, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015085384813708648, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015306986941857463, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.016063594406056012, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01656501048188786, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.018564557683235065, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021568127200491893, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02784923462778538, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.035067063040783344, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.04596032141723919, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.053479403795045045, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.07008787459618813, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.10142417744523709, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.13275574206755367, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.14892616061867536, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.16186234742819983, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.17346061112844255, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1750583539560194, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17406740092005404, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1756792082612999, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1814513582490068, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.17827598520900448, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18798454826801947, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18493636585366566, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.17920212839479985, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18195907513478984, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17662876227223703, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17646393345456887, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.16139232072237095, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17506338708968094, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16936486168697482, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1616352017728521, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17084534034137, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16044197112523728, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1669798456118984, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1733119059335909, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1752111743544218, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17595891201704206, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17212709712058652, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1740596251911125, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.3070349311828613, "validation/loss_best": 2.518733501434326, "validation/acc_best": 0.24455518641565152, "validation/f1_best": 0.17346061112844255} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 2.417224261760712, "train/grad": 0.25482960291206835, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11765380859375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.116541748046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1146826171875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.112906494140625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11116943359375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10877197265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1059326171875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.102413330078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.096617431640625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0873779296875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0730938720703125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.03864501953125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9945654296875, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.853918151855469, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.6831320190429686, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.57487174987793, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.485157470703125, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.371614360809326, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.2641991806030273, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.2037288475036623, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.1291856956481934, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.053690013885498, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9683505201339722, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8978822922706604, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8306085252761841, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7577129817008972, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.701451370716095, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6644240021705627, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.631193345785141, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.6192195135354996, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.5687125474214554, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.537535702586174, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.5550565671920777, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.6051052182912826, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.6091361570358276, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.638855140209198, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.7124854248762131, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.7474403220415116, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.7912263149023055, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.8321679854393005, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.8604543328285217, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9119790303707123, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019938659514300527, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019938975744880735, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019940660730935634, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019940115800127386, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019938759859651326, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019933025622740387, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01992653888184577, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019923143768683076, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019941716571338475, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02006740973331034, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020420381505973636, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02157035646494478, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023384061018005012, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02709552208893001, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.031150418194010855, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03383135902695358, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03555160623975098, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03819533471018076, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03938768908381462, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03943690959364176, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03959787303581834, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04046625385060906, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.041727122664451596, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04255734721198678, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04367823975160718, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04505727434530854, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04635973358526826, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04717029932886362, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04816154295578599, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.049054364170879126, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05048394748941064, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.050898048616945746, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05131982333958149, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0509784622117877, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05110054789111018, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05099681742489338, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.049397768694907425, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.049424871131777764, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04974421363323927, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.050905773658305405, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05168204674497247, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05032962704077363, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1214280128479004, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1209769248962402, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.120227813720703, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1194779872894287, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1187331676483154, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1176555156707764, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.116182565689087, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1141536235809326, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1101176738739014, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1026828289031982, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0901825428009033, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0613749027252197, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0303080081939697, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.868953227996826, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.728576183319092, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.6324899196624756, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.587463140487671, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5525331497192383, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.543656587600708, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5542237758636475, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.578248977661133, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.597633123397827, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.6315319538116455, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6790003776550293, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.7195544242858887, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7712204456329346, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.8426573276519775, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.87998366355896, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.967355489730835, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.070725679397583, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.113509178161621, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.122206211090088, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1472811698913574, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.105337619781494, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.056509494781494, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1104671955108643, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0472278594970703, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0286264419555664, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.964708089828491, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0265305042266846, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0908384323120117, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.979954242706299, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.08084163898117387, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.09616094499815431, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.10834256183093392, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.14544112218530822, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.17497231450719822, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.20191952750092285, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.21373200442967885, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2334809892949428, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24031007751937986, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.23624953857511996, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2349575489110373, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2368032484311554, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2307124400147656, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22296050203026946, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.22572905131044665, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2159468438538206, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21945367294204504, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21077888519748986, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21225544481358435, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.20044296788482835, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21096345514950166, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2085640457733481, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2159468438538206, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20191952750092285, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21244001476559615, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20376522702104097, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20431893687707642, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.1998892580287929, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21225544481358435, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01096461095509041, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011791721134137699, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012935928966849126, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013358498465159255, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013680946313008209, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01459496351361051, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014563380120748601, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01754377663830238, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.020047930351686123, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02658075116929279, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.03467342317074804, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.04509761006246397, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.05523881116045606, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.07852143430175845, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.10285554749887027, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.127332241342505, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.13926983011150332, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.16123220191844254, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.17226930563503062, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.17119918465763942, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17354973595630785, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.17746239638865366, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1741380382274075, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.16825879490946657, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1717053673668828, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.17123121822636608, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.16333956610831976, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.16783967877669767, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.15917442599106454, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.16078697628598826, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.15610241030507857, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.15942189824730188, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16210545337972782, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15837539641324527, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16343018831869105, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1510563976647593, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16728362922149861, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1708873906761924, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.15683016579246542, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15716796539901776, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15297706943773395, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15774252892800647, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.2641991806030273, "validation/loss_best": 2.543656587600708, "validation/acc_best": 0.24031007751937986, "validation/f1_best": 0.17226930563503062} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 2.382471523284912, "train/grad": 0.2573080117255449, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12145751953125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.120269775390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11830810546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11642578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.114678955078125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.112203369140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10927978515625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1056591796875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.099429931640625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.08916748046875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0729241943359376, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0347857666015625, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.98882080078125, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.799172058105469, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.657889862060547, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.548520736694336, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.45952392578125, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.343032188415527, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.2394439125061036, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.176235332489014, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.100908703804016, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.023460874557495, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9335901355743408, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8603441846370696, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.789080842733383, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7106229853630066, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6489960855245591, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.607719589471817, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5709582394361497, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.548896376490593, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4928598654270173, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.4589627850055695, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.4648994052410125, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.522099294066429, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.5337224805355072, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.5638381844758988, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.6251752388477325, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.6527731317281722, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.6967520594596863, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.7254335922002793, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.758554643392563, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.8211042523384093, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020278858933597804, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020279352306388318, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020278006922453642, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020276331203058363, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020273141264915466, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020266929618082942, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02025931605603546, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020258564478717746, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020296388897113504, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02048500267788768, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020958240758627653, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0223502351436764, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02434952509589493, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02891324631869793, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03220239499583841, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.034772851541638373, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03641705355606973, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.038989558685570955, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.039933505058288576, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0399540644325316, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04008260689675808, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04078043082728982, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04203652806580067, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04305531524121761, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.044302507769316435, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04586617488414049, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04707731647416949, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04777783332392573, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04865990659222007, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.049217526018619534, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05059584029018879, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.051729713939130305, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05211272969841957, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.051702560037374494, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.052374550122767684, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.051773340441286564, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04965353289619088, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05030870079994201, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04922063659876585, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.049621857032179834, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05030469177290797, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.049257866423577074, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.121258020401001, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1207058429718018, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1198198795318604, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.118896245956421, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1179702281951904, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1165771484375, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.114776849746704, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.112234115600586, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.107205629348755, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0979421138763428, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0829198360443115, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0510830879211426, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0197222232818604, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.8208277225494385, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.7006430625915527, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.6121532917022705, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.566985607147217, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.525660753250122, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5182406902313232, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.531365394592285, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.56042742729187, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5809364318847656, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.610124349594116, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.662928342819214, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.706289291381836, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.758467197418213, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.824373483657837, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.865652084350586, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.949479818344116, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0244343280792236, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1707956790924072, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1901309490203857, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2607173919677734, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.2664382457733154, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.223937749862671, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.2400434017181396, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.197641611099243, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1203482151031494, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0560126304626465, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1648170948028564, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1885552406311035, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.040229082107544, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.067921742340347, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.067921742340347, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07862679955703211, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.08527131782945736, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.10335917312661498, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1094499815430048, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.15411590992986343, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.19324473975636766, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.21188630490956073, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.22868217054263565, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.24344776670358065, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2440014765596161, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24326319675156885, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2369878183831672, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2382798080472499, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2349575489110373, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22978959025470652, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2262827611664821, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2220376522702104, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.21945367294204504, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21096345514950166, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21133259505352528, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.1969361387966039, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.19915097822074565, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.1982281284606866, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.1921373200442968, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.19176818014027316, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19121447028423771, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19952011812476927, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2074566260612772, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2009966777408638, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.1984126984126984, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19582871908453303, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2085640457733481, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01312440138503565, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01381605584144227, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.015019202539359545, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015560626862808553, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0163814633953614, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017293131640056667, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.018556449739623903, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.020237196222084, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.026604056542933404, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03267403089205658, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.038907154441214044, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.050467801209780505, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.05577996929635979, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.08714038415184723, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.119641081645094, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.13922297588574836, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.15619216051573978, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1701500096337799, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1770552510209039, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.17906670033811664, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17494347927348708, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.17836155021080025, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1804496027355449, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.17968631456723583, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1812179360260885, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18300663219893498, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18280780076932282, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17714672002794732, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17274765739416922, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17179070405619123, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.16099078865867442, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.16105724024759396, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16019574455325247, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15231445790035933, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1560051037333862, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.14878382319902048, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1599699419971546, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15680641359135425, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.15740161971391425, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15262314558410997, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1473590587157391, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15909198762387597, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.2394439125061036, "validation/loss_best": 2.5182406902313232, "validation/acc_best": 0.2440014765596161, "validation/f1_best": 0.1770552510209039} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 2.342321789264679, "train/grad": 0.25617745354771615, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11876953125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.117413330078125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11515380859375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.113001708984375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11094970703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10812744140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10475341796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.10047119140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.092760009765625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0801605224609374, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0601959228515625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0159808349609376, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9665341186523437, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7493663024902344, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.6186962127685547, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.5170980072021485, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4300975799560547, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3119046783447263, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.210197525024414, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.147417593002319, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.066907091140747, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.985943350791931, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8939926707744599, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.819654995203018, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7475215691328048, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6641804534196853, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5993437600135803, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5576954281330109, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5138086593151092, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4840830045938491, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4194021171331406, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.3783212155103683, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.3780640602111816, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.4211560028791428, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.4262126928567886, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.4509522306919098, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.5223053812980651, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.553672432899475, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.5952723670005797, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.6278783345222474, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.6531536054611207, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.7220738589763642, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01988103332463652, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019881386137567462, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0198802540730685, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019877686412073672, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01987564068287611, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01987115163821727, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019866967969574035, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019874628502875566, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019941801424138247, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02019806096330285, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020786662064492704, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02242218155413866, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.024585372125729917, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02966012337245047, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03275641238316893, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03513255028985441, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.036740380842238664, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03917747602798045, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03988041169941425, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.039882313292473555, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04009871056303382, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04087454358115792, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04217412577942014, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.043226884435862305, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04452665897086263, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04595671532675624, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04718876503407955, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0480004052631557, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.048835851363837716, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.049469236303120855, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05070138553157449, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05123046787455678, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05096384879201651, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.050561702754348514, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05091743463650346, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05065574161708355, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04968187624588609, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04961844481527805, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04929793067276478, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0498325264453888, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05008092457428574, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04854837732389569, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1207616329193115, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.120163679122925, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.119173526763916, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1182050704956055, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1172170639038086, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1157567501068115, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1138548851013184, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1110780239105225, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1055192947387695, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.095087766647339, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0783283710479736, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.044238328933716, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.010845184326172, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.793625831604004, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6764910221099854, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.6012752056121826, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5578205585479736, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.520028829574585, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.517778158187866, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5342252254486084, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5640807151794434, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5878870487213135, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.6207685470581055, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6785836219787598, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.735710620880127, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7944960594177246, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.866157293319702, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.904012441635132, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.97943115234375, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0897932052612305, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.212688684463501, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2006332874298096, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2790770530700684, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.2537999153137207, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.2445924282073975, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.241471529006958, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.193491220474243, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1469738483428955, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.108160972595215, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1343536376953125, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.211801528930664, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1263418197631836, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08084163898117387, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.08970099667774087, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.10705057216685124, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.11277224067921743, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.16315983757844224, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.1998892580287929, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.21982281284606867, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.23052787006275377, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.24806201550387597, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24713916574381692, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24363233665559247, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.23717238833517904, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.23901808785529716, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.23588039867109634, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2277593207825766, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.22609819121447028, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.21834625322997417, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.21502399409376152, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21225544481358435, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20487264673311184, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20782576596530086, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.18992248062015504, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20173495754891105, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.19361387966039129, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.19121447028423771, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.1925064599483204, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.18807678110003692, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19804355850867478, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.19656699889258028, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.19158361018826134, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.18973791066814322, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18401624215577703, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19859726836471023, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011883718140670211, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012551288040967785, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013826596644692856, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013756626297134788, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014776829221968019, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.015812144117019658, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01747089150941847, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.019650222471916095, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.025949334693262494, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03268967563584597, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.04161811607406179, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.05357587530595024, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.05860679381335263, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.09402335743640937, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.12309735055691913, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.14323495133620623, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1548987056860529, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1730019336095642, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.17682543337182158, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1787746383062755, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17576450708913116, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.18106272070530274, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18368071236416736, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18072361871206374, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18011262150398558, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.17706306775105887, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.17541466894290358, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17606372792367453, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.16926905787915017, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17335692756576856, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.16345619217865656, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17098395127027613, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16047635708916655, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1600267896094764, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1633497812792475, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15904273948278522, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16809920732937514, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16309372387286214, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16289110076539537, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1533703375195014, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15581533667474518, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1617184782702394, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.3119046783447263, "validation/loss_best": 2.520028829574585, "validation/acc_best": 0.24806201550387597, "validation/f1_best": 0.1730019336095642} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 2.3132343697547912, "train/grad": 0.2543561752140522, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1145458984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.113189697265625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11101806640625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1088330078125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.107034912109375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.104107666015625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.100738525390625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.096295166015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.088494873046875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.074869384765625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0536712646484374, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.00820068359375, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.958735046386719, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.731935729980469, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.603969955444336, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.510679244995117, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.423056793212891, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.300694675445557, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1980391693115235, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1350052070617678, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0571876335144044, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9744017934799194, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8752619814872742, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.796047681570053, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7223223042488098, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6334549796581268, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5634059238433837, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5229841101169586, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4733002972602844, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4342267990112305, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.3581275671720505, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.31072585105896, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.2978502583503724, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.3341335809230805, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.333360996246338, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.35960626244545, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.4273217850923539, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.4649278527498246, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.515215734243393, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.5470305979251862, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.5688719153404236, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.6324803149700164, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020287199621088804, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020286639044061302, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020285386852920056, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020281792981550098, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0202768295397982, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02026979426853359, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02026259164791554, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02026862571015954, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020343255340121685, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020635381331667303, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021282058991491794, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023048039553686978, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.025243308637291195, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03057260627858341, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03364395727403462, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035813176073133945, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03742280601523817, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0398011336941272, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.040404644794762135, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04035365551710129, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04051245769485831, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.041260413508862254, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04242830319330096, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04339629538357258, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0443954904936254, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04553581891581416, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.046483067907392976, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.046914413124322894, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.047406673021614554, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04796792451292276, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04848300000652671, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04891269017010927, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04911393020302057, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04896743446588516, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04963042300194502, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0498884710855782, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04944184834137559, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.049339991547167304, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.048864108435809614, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.050083661060780285, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05048742858693003, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.048156612142920496, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1207613945007324, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1201834678649902, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1191978454589844, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1182732582092285, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1173157691955566, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.115861654281616, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1139423847198486, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1110360622406006, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1050808429718018, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.093841314315796, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0759551525115967, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0412065982818604, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.007516860961914, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.7826812267303467, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.665703535079956, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5977702140808105, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5546975135803223, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5184996128082275, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5180470943450928, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.534238338470459, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.565574884414673, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5918946266174316, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.6275253295898438, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6823604106903076, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.7323195934295654, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7902467250823975, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.8612332344055176, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.9066736698150635, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9848227500915527, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0994162559509277, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.208136796951294, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.198354959487915, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2519149780273438, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.263972043991089, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.2248635292053223, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.223844528198242, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.209996223449707, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1951956748962402, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.145010232925415, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.168748140335083, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1869308948516846, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0813159942626953, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.067921742340347, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.067921742340347, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07327427094868956, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08305647840531562, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.09431524547803617, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.10871170173495755, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.11443337024732374, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.16463639719453674, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.20210409745293467, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.21908453303802142, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.23385012919896642, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2499077150239941, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2456626061277224, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24437061646363972, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.24012550756736803, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24197120708748615, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.23994093761535623, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22739018087855298, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.22609819121447028, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.21982281284606867, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.21336286452565523, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21207087486157253, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20616463639719454, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.1891842008121078, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20136581764488742, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.19306016980435586, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.1877076411960133, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.1934293097083795, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.18863049095607234, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.18660022148394242, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.19029162052417867, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.18549280177187155, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.18290882244370615, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18586194167589518, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19287559985234404, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012299965732155567, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01372211686752464, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01492687941207121, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015274589172657449, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.016185892853526347, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017230417810212163, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01914557706031827, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.021584375416843094, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02673723475059445, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0365340644670559, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.045223890402077166, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.055739196224096156, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06103220323233433, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.09804039529713038, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.12876363538314714, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.143738490478154, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.15853399463979043, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.17397902608202434, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.17606759136421338, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1777906987722738, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1756876651517859, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.18058151603012337, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1835518861469804, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.17653130569356867, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18076395543105384, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1790863195075211, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.17715798451992973, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17763725868876187, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17229994821515307, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1706640911827931, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1614951010548382, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17281785108451708, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1669981150407626, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16358845013945156, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17034149147285405, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17028863986318846, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1677267960911274, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16934726546186396, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16181798766178038, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16040636514574083, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16730961017233428, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1663475997996535, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.300694675445557, "validation/loss_best": 2.5184996128082275, "validation/acc_best": 0.2499077150239941, "validation/f1_best": 0.17397902608202434} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 2.2902757287025453, "train/grad": 0.25033239215612413, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.122379150390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1209375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.118740234375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.116585693359375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11459228515625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.111485595703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.108001708984375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.103299560546875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.094803466796875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.080006103515625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.05706298828125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0091802978515627, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.95800048828125, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7307365417480467, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.603905258178711, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.512107391357422, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.42222110748291, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.298047275543213, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1959054851531983, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1306621265411376, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0480070686340333, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9628879642486572, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8640476083755493, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7807774424552918, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7030836689472197, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.609250327348709, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5395707511901855, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4961347717046738, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4425945246219636, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4054060083627702, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.3180635267496108, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2704013562202454, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.2510594838857652, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.27692098736763, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.2713215732574463, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.284920011162758, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.3396173083782197, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.369284639954567, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.4150377798080445, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.419497513771057, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.4366391587257386, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.5072039544582367, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020242783646099268, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02024192420300096, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020239371913485227, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020235441508702934, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02022798242047429, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020221592714078723, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02021666944492608, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02022521125152707, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02031617396045476, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020646887496113778, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021355760456062852, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02325413841754198, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.025475963260978462, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030874000238254665, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03392598968930542, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03595517112873495, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03755425387062132, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0398478136304766, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04037184638902545, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.040258569568395616, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04046143746003508, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.041119435355067256, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04226528782397509, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04303589591756463, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04393916103988886, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04497330589219928, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.045882924478501085, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04654303593561053, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.047077379524707794, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04750667566433549, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.047868019528687, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04793514212593436, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04792665267363191, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04809899747371674, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.048727310691028834, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04838472031056881, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04783736817538738, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04754645897075534, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.047194720320403574, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04757992772385478, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.048221084251999856, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04601682934910059, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1207311153411865, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1201560497283936, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1191766262054443, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1182286739349365, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1172678470611572, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1157779693603516, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.113790512084961, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1108205318450928, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1045308113098145, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0926342010498047, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0739989280700684, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.038588523864746, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.004211664199829, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.7748100757598877, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.658221960067749, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5950655937194824, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5525588989257812, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5196712017059326, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.523315906524658, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5427913665771484, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.576469659805298, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.604064464569092, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.6415953636169434, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.7024056911468506, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.756556272506714, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.821753740310669, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.9000768661499023, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.941950559616089, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0272414684295654, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.157399892807007, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.259739398956299, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.252049446105957, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.3237617015838623, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.3450474739074707, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3101096153259277, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.3255691528320312, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.3052027225494385, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.2205898761749268, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.184666872024536, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1941888332366943, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.210505247116089, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1065328121185303, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08176448874123293, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.09339239571797711, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.10741971207087486, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.11498708010335917, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.1674049464747139, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.20653377630121816, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.21834625322997417, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2368032484311554, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.24473975636766335, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24640088593576967, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24473975636766335, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.23569582871908454, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2369878183831672, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.23458840900701367, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22702104097452935, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.21686969361387967, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.21114802510151348, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.20911775562938353, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20321151716500555, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2054263565891473, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.18826135105204872, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2024732373569583, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.19490586932447398, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.18586194167589518, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.1925064599483204, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.18456995201181248, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1878922111480251, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.19324473975636766, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.19195275009228496, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19102990033222592, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18660022148394242, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19490586932447398, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012701562587534456, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013238333541289263, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014078014016360487, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014882405107245603, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015604504232065067, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.016258409075399077, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01829812807436806, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02072906299457053, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02674025586433763, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03488021838284117, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.04503873489546881, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.055886674265173304, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.062411183781023526, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.09967134915924679, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.13416636003209684, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.14616449346382196, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1640264690446522, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.17272117643140747, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.17857435702432056, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.17961490957728143, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17429994851737765, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.17873511589906377, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18146883381402387, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.17931913347019887, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18117036101571835, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.17863613131623737, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.17441374315044045, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1739669973833442, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.16662967135915066, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1679652530495439, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.15795246512090425, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.16778254170162246, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16104572388931848, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15477965958770737, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16241659688930668, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15695397189337293, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16109453371558677, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16231036477029126, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16333814686538603, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16323419359854074, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16350569791991135, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1655226848984512, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.1959054851531983, "validation/loss_best": 2.523315906524658, "validation/acc_best": 0.24640088593576967, "validation/f1_best": 0.17857435702432056} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 2.2662338054180147, "train/grad": 0.24419414043426513, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.118016357421875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11663818359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.114456787109375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.112237548828125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.110103759765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.107225341796875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.103572998046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.098929443359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.090321044921875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.075325927734375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0522119140625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.003944091796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9524603271484375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7174667358398437, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5906231689453123, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4991329193115233, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4097118377685547, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2864331436157226, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.183817157745361, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1184117889404295, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0334544658660887, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9487464284896852, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8516161715984345, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7663976013660432, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6847422266006469, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5929124635457992, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5194493681192398, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4710641658306123, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4127031195163726, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.370606797337532, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2868067127466203, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2243520066142082, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1979012233018875, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.227742623090744, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.2008236259222032, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.206482741832733, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2711040610074997, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2935082203149795, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.3437276196479797, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.3270960652828216, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.3367427104711533, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.4133126139640808, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02003950301092118, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02003659039735794, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02003241687081754, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020027329255826773, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02001857491210103, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020009404984302818, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02000188029836863, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020012113023549317, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0201078559178859, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020452865161933006, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02118527576792985, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023139889128506185, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.025363563988357783, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030832137940451504, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.033815083112567666, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035748640932142736, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03733152410015464, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.039556584935635325, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04001189725473523, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.039892319776117804, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.040007286202162505, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.040640714541077615, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.041769585423171524, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.042451218012720345, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.043322655875235795, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04436675639823079, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04515610512346029, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.045636775922030214, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04603802254423499, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.046219526510685685, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04668089084327221, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.046607994902879, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.046479505710303785, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.046345225758850576, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0461937720887363, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.046277308091521266, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04575671549886465, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.045580621231347325, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04493045162409544, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04543689334765077, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04588586788624525, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04440561629831791, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.120683431625366, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.120110273361206, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1191253662109375, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1181724071502686, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1171834468841553, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1157009601593018, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.113662004470825, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1105916500091553, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.104145050048828, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0919432640075684, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0729174613952637, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.037240505218506, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0024988651275635, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.7712950706481934, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6542246341705322, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5919268131256104, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5476787090301514, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5121238231658936, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5133049488067627, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5309972763061523, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5627248287200928, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.589674949645996, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.627187728881836, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6863532066345215, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.738948345184326, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.800014019012451, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.8767504692077637, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.921337604522705, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0061209201812744, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.108220100402832, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.2591185569763184, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.247545003890991, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.3286542892456055, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.3421952724456787, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3179771900177, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.3360369205474854, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.329939603805542, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.257756233215332, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.2216875553131104, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2683145999908447, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.29359769821167, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1752519607543945, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07456626061277224, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0828719084533038, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0946843853820598, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.10575858250276855, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.11332595053525286, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.16925064599483206, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.20653377630121816, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2201919527500923, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.23735695828719083, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.25212255444813586, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24732373569582872, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24492432631967515, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.23846437799926173, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.23864894795127353, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.23329641934293097, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22739018087855298, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2233296419342931, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2157622739018088, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2100406053894426, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.20930232558139536, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20450350682908822, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.18992248062015504, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20044296788482835, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.19767441860465115, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.18604651162790697, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.19176818014027316, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.18844592100406055, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.18844592100406055, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.19638242894056848, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.1893687707641196, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19269102990033224, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18733850129198967, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2056109265411591, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013129709577613244, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01375770984594119, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014736272061760139, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015595700128794349, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01618026160256025, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.016918534468810403, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019564295309101212, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.0218809531081116, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.027506346662063464, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.036753388343750575, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.046199950542907986, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.05459032231472558, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06294903599246826, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.10273883064058766, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.13420615111360742, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.14760294175213637, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.16551079475724964, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.178891054967223, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.18002164538879295, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18089747037452955, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17772866284993016, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1799242525535051, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18110609333355907, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18153191917297484, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18242676311847106, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1794005306667469, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.174742843073979, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17649595066784632, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17039720600468988, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17325454643016602, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1610169455688023, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.16749309369803186, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16510435003629587, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15434337225729497, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16342617648689625, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16113377428575534, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.162605023355593, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16751239930755743, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1582921970963987, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16278968700201837, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16024657139126833, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16865577825610115, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.2864331436157226, "validation/loss_best": 2.5121238231658936, "validation/acc_best": 0.25212255444813586, "validation/f1_best": 0.178891054967223} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 2.2503112995624543, "train/grad": 0.24197896741330624, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.119906005859375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.118546142578125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1164111328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.114306640625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11212646484375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10927978515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.105731201171875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.10110107421875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.09228759765625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.07736328125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0540716552734377, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0061016845703126, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.954595947265625, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7171003723144533, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5904039001464843, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4996756744384765, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4093773651123045, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.282209777832031, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1787763023376465, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1119524097442626, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0249148750305177, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9388933801651, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8390602684020996, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7582261037826539, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6771632146835327, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.580997896194458, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5060216665267945, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.452236726284027, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.394618324637413, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3418686497211456, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2563966119289398, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2012390965223312, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1722370690107347, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1894392359256745, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.1625363022089004, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.1597405540943146, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.220592062473297, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2274194312095643, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.2694040966033935, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.2480242675542832, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2465010184049605, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.3232755964994432, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020153897739946844, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020149119468405842, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020142982830293477, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020134881571866572, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020126320309937, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02011689867358655, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020110380901023746, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020121437846682966, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020222888342104853, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020583240813575685, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021337346378713845, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023332855831831693, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02556210823357105, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0309620030131191, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03399088065139949, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03597138003446162, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03760257076472044, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03977840207517147, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04025075806304813, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04015061533078551, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04022601092234254, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04080010883510113, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04180599346756935, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04254249610006809, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04338325815275312, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04431809678673744, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04499679492786526, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04537445224821567, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04568647777661681, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.045746200066059825, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0459647499397397, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.046118182912468914, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04580016925930977, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04541734533384442, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04541983000934124, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04518290331587196, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.044457618314772844, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.044323143158107994, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.043686492890119555, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.044065152890980244, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04436439724639058, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04292542666196823, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.120673894882202, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1200854778289795, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.119112491607666, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.118149757385254, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1171774864196777, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.115665912628174, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.113621950149536, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.110529899597168, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.10404634475708, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0917603969573975, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0726280212402344, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.037006378173828, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0022292137145996, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.770861864089966, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6544017791748047, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5934853553771973, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5501842498779297, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.516839027404785, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5193543434143066, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5381507873535156, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5702450275421143, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5967466831207275, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.633655071258545, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6924984455108643, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.7439916133880615, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.804959774017334, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.8797848224639893, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.9236178398132324, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0085864067077637, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1179676055908203, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.2597768306732178, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2504069805145264, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.319230318069458, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.3469865322113037, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3320233821868896, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.3418216705322266, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.334700345993042, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.2765750885009766, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.2504968643188477, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.3032333850860596, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.328350782394409, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.209172487258911, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07438169066076043, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08176448874123293, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.09653008490217793, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.10594315245478036, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.11498708010335917, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.16648209671465486, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2059800664451827, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2207456626061277, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.23569582871908454, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2502768549280177, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24769287559985234, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2467700258397933, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2382798080472499, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2395717977113326, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2369878183831672, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2264673311184939, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.22462163159837578, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.21502399409376152, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.21077888519748986, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.20948689553340716, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20468807678110004, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20616463639719454, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.18752307124400147, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.1984126984126984, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.1984126984126984, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.1877076411960133, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.18992248062015504, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.1893687707641196, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1878922111480251, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.19306016980435586, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.18844592100406055, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19010705057216684, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18715393133997785, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20062753783684018, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012775833885223681, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01392583329354785, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014656032592540308, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015369471265331837, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01589429839584174, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01716639981246597, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019442200627927276, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02207429694418959, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.027772781215472827, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.036542057946029105, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.047831518517536115, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.05450274073994834, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06320901418222218, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.10033047649814784, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.13346881074910347, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.14786287092033773, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.16448927496731341, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.17728335746218984, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.17973451003686478, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18231200763833186, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17688994764519306, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.18039333583599063, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18414813462758173, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18033089505855804, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18388778591511878, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.17923661435577162, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1774791250572545, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17771673806966648, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1712448440748956, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1719553191856612, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.15985410378562478, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.16727128689984383, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16842171730638233, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15883603730439336, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16239529276214096, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1634576949983955, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1625552114707549, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16544721969625134, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.15908540604754648, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15989693438219174, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16219383592336092, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16923730200136577, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.282209777832031, "validation/loss_best": 2.516839027404785, "validation/acc_best": 0.2502768549280177, "validation/f1_best": 0.17728335746218984} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 2.252056711912155, "train/grad": 0.23751143790781498, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12120849609375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.119825439453125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11791748046875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.115936279296875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.113955078125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.111214599609375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.107891845703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.10337158203125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0948876953125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.080146484375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.057266845703125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0100091552734374, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.959073486328125, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7234480285644533, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.595362548828125, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.5054683303833007, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4159843063354494, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2922454643249512, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1904195976257324, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.125614070892334, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0419623184204103, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9570705556869508, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8554586911201476, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7723886954784394, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6877084529399873, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.594275382757187, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.516485859155655, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4619602370262146, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.399607058763504, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3478748095035553, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.266387489438057, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2084204179048539, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1722584599256516, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1863371169567107, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.1619326519966124, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.1553384366631507, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2065269187092782, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.208521448969841, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.2527096623182297, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.216448149085045, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2183150106668472, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.29842109978199, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019797220136970283, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019794177883304655, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019788276748731733, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019781625443138183, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019773239060305058, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019763321955688296, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019755163202062248, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019765811152756215, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019863218208774923, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02021701667457819, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02096137933433056, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02293483798392117, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02512474057264626, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030399888968095184, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03330415001139045, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03516250831075013, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03673642422072589, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.038999486044049264, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03951224438846111, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03942913306877017, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.039642947986721994, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0402920300513506, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04129948092624545, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04198245631530881, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.042736783344298604, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.043713380713015795, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04437530711293221, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.044630818907171486, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04501529324799776, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.045082635544240475, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.045289983619004485, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.045221266094595196, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.044999512862414125, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04457012925297022, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04439491944387555, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.044302579741925, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04369454313069582, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04326587952673435, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.042653899900615216, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04286762747913599, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0432940368168056, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.041748427338898185, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1206719875335693, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1200733184814453, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.119102716445923, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1181421279907227, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.117175579071045, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1156468391418457, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.113621473312378, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1105027198791504, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1040120124816895, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0916969776153564, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0725417137145996, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.03686785697937, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0020740032196045, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.7705655097961426, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6541783809661865, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5931851863861084, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.549799680709839, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5161631107330322, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5182840824127197, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5370750427246094, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5690910816192627, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.595564603805542, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.632690191268921, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6922242641448975, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.7444708347320557, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.8060660362243652, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.882720708847046, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.9270238876342773, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0120530128479004, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1226654052734375, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.2622053623199463, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.253533124923706, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.327308177947998, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.349688768386841, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.334758996963501, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.346827745437622, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.33886456489563, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.2784132957458496, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.247823715209961, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.302231788635254, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.3248484134674072, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.213104486465454, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07161314138058324, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08250276854928018, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.09634551495016612, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.10594315245478036, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.11480251015134736, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.16519010705057216, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.20653377630121816, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2216685123661868, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.23624953857511996, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2502768549280177, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24769287559985234, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24584717607973422, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2382798080472499, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.23864894795127353, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.23477297895902546, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22591362126245848, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.21428571428571427, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.21114802510151348, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.20837947582133629, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20358065706902917, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20376522702104097, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.1906607604282023, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.19878183831672203, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.1997046880767811, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.18715393133997785, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.18973791066814322, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.1878922111480251, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1891842008121078, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.1921373200442968, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.18807678110003692, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.18844592100406055, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18567737172388335, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19656699889258028, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012872560262131178, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01375237899348481, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014653976352961095, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015239520208450173, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015917145432899755, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017223935910918073, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019278496433607323, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.021884403475255835, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.027627108771317906, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03676334518057198, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.04750085343453401, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.05456594258344727, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06342865674111288, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.09977071227899252, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.13401917012714626, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.14817361474439783, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.16454636472533954, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1774014094554318, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1796088741689077, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18201463337535695, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17712578889993744, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.18037948168114282, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18190094525004877, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.17984249570496694, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18254065128591748, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.17857233131396258, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.17682809752097225, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1766853382092739, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.16985112695190238, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.16923651251300484, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1620050926646646, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.16686492560013858, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16820316919633868, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15751723587356292, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16257829114183225, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16150083326171497, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1642139588787418, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1644820421929451, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.15913371831051523, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15974745101842705, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1612246850644113, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16480318714956635, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.2922454643249512, "validation/loss_best": 2.5161631107330322, "validation/acc_best": 0.2502768549280177, "validation/f1_best": 0.1774014094554318} diff --git a/data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic/config.yaml b/data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce104a32f6ad19ec6543b1c8386e6621a4a831a2 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n100_2; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv b/data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..b6332d63986186841c084489a0217f96b6d46545 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,ppmi_dx,,0.005994842503189409,train,0.7153024911032029,0.016520085057862694,0.6691003297220914,0.02082509511284306,0.6646764175515929,0.018750712992667146 +flat_mae,patch,logistic,ppmi_dx,,0.005994842503189409,test,0.6,0.040802053869872776,0.5143273433705683,0.04965398392813147,0.5263835263835264,0.042938745894360135 +flat_mae,patch,logistic,ppmi_dx,1,0.005994842503189409,train,0.7135231316725978,0.01631439188201505,0.663329054343175,0.020611886255732823,0.6594947548704775,0.01833328114324436 +flat_mae,patch,logistic,ppmi_dx,1,0.005994842503189409,test,0.63,0.042910436958856525,0.5636277862955537,0.052661966233036685,0.5691850594227504,0.046042239444618674 +flat_mae,patch,logistic,ppmi_dx,2,0.046415888336127774,train,0.7864768683274022,0.015773893996631677,0.7582622410208617,0.019113118704548595,0.7474443374009848,0.01840776723249508 +flat_mae,patch,logistic,ppmi_dx,2,0.046415888336127774,test,0.64,0.04592576183363755,0.6043956043956044,0.04998183011352542,0.6027164685908319,0.04832999056978765 +flat_mae,patch,logistic,ppmi_dx,3,0.005994842503189409,train,0.7153024911032029,0.015883344490718515,0.6592132191313576,0.02115423474054879,0.6565912010276171,0.018258696135478276 +flat_mae,patch,logistic,ppmi_dx,3,0.005994842503189409,test,0.65,0.04314486759743272,0.612789025334661,0.049220156160455775,0.6107809847198642,0.04704041778046909 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,train,0.708185053380783,0.017087586151540428,0.6542316926770708,0.022468428427599784,0.6516805823164205,0.019655067821652696 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,test,0.67,0.04272147469364792,0.6239316239316239,0.05001481661872014,0.6218166383701189,0.04634679800420373 +flat_mae,patch,logistic,ppmi_dx,5,0.005994842503189409,train,0.7170818505338078,0.016962615813275492,0.669625658563638,0.021425456588974833,0.6649941126097195,0.01922405724162017 +flat_mae,patch,logistic,ppmi_dx,5,0.005994842503189409,test,0.6,0.04216384707305537,0.5324918186068257,0.04959622890638098,0.5398981324278438,0.04406331995021132 +flat_mae,patch,logistic,ppmi_dx,6,0.005994842503189409,train,0.7259786476868327,0.016077695927229144,0.6795088280601967,0.020725617690289806,0.6739590023549561,0.018581916855535726 +flat_mae,patch,logistic,ppmi_dx,6,0.005994842503189409,test,0.61,0.04257212233375265,0.5400400990682863,0.053588468882064436,0.547962648556876,0.0465224406717436 +flat_mae,patch,logistic,ppmi_dx,7,0.005994842503189409,train,0.7206405693950177,0.017091564186547645,0.6758045729948596,0.021330142173573492,0.6704934703489617,0.01935702196182345 +flat_mae,patch,logistic,ppmi_dx,7,0.005994842503189409,test,0.62,0.045790549243266344,0.5824175824175825,0.05140039161643029,0.5814940577249575,0.0493052508620957 +flat_mae,patch,logistic,ppmi_dx,8,0.005994842503189409,train,0.7295373665480427,0.015948610498142737,0.6805910770105144,0.020976781612839987,0.6751097195461357,0.01857700670580656 +flat_mae,patch,logistic,ppmi_dx,8,0.005994842503189409,test,0.63,0.03796197571254689,0.5460679671205987,0.0493629908011042,0.5589983022071308,0.04099909192315337 +flat_mae,patch,logistic,ppmi_dx,9,0.005994842503189409,train,0.708185053380783,0.01608591746433645,0.6530707499585949,0.021034040398669452,0.650810854206808,0.018378866078028758 +flat_mae,patch,logistic,ppmi_dx,9,0.005994842503189409,test,0.7,0.04008483004828634,0.6493688639551192,0.04955815575951578,0.6460101867572157,0.04458778549670705 +flat_mae,patch,logistic,ppmi_dx,10,0.005994842503189409,train,0.7135231316725978,0.0171899968496827,0.6654700064700989,0.02204748945542653,0.6612342110897024,0.019702650733169277 +flat_mae,patch,logistic,ppmi_dx,10,0.005994842503189409,test,0.66,0.04026114752462974,0.6026180458158018,0.04940179498687207,0.6035653650254669,0.044058183457382445 +flat_mae,patch,logistic,ppmi_dx,11,0.005994842503189409,train,0.6921708185053381,0.015906587983993827,0.62827703542276,0.021871040367901797,0.6299775208734746,0.018408929109689862 +flat_mae,patch,logistic,ppmi_dx,11,0.005994842503189409,test,0.71,0.040367194601557334,0.6579785352046232,0.05249896150907498,0.6540747028862479,0.04637017230555599 +flat_mae,patch,logistic,ppmi_dx,12,0.005994842503189409,train,0.7277580071174378,0.01627570607296824,0.6859750419072452,0.020539576396441987,0.6797527296082209,0.018813480601564493 +flat_mae,patch,logistic,ppmi_dx,12,0.005994842503189409,test,0.59,0.03768212308243791,0.48589341692789967,0.048101291326795116,0.5114601018675722,0.03938125257107833 +flat_mae,patch,logistic,ppmi_dx,13,0.005994842503189409,train,0.7455516014234875,0.0159324459454294,0.6990071272607424,0.02084989479431047,0.6915944123314065,0.018561044159025878 +flat_mae,patch,logistic,ppmi_dx,13,0.005994842503189409,test,0.61,0.04076264957040943,0.5481404240528328,0.0476285931785262,0.5530560271646858,0.04269974502653194 +flat_mae,patch,logistic,ppmi_dx,14,0.005994842503189409,train,0.7135231316725978,0.016760988195651406,0.6654700064700989,0.02092168468372803,0.6612342110897024,0.018759216816523625 +flat_mae,patch,logistic,ppmi_dx,14,0.005994842503189409,test,0.63,0.04125929713410058,0.5636277862955537,0.05111540045144083,0.5691850594227504,0.04462291644387462 +flat_mae,patch,logistic,ppmi_dx,15,0.000774263682681127,train,0.6779359430604982,0.013435630001758559,0.5822213460488002,0.021132902269048074,0.5992828088203811,0.015726861911244647 +flat_mae,patch,logistic,ppmi_dx,15,0.000774263682681127,test,0.65,0.037807935674934706,0.561128526645768,0.051910085885185295,0.5751273344651953,0.04183739426858399 +flat_mae,patch,logistic,ppmi_dx,16,0.005994842503189409,train,0.7170818505338078,0.01621505519867448,0.6642128403133984,0.02115705100025588,0.660645472061657,0.018555785857253537 +flat_mae,patch,logistic,ppmi_dx,16,0.005994842503189409,test,0.62,0.044401851312754974,0.5703301673450927,0.05105053289393683,0.5713073005093379,0.04730951064567415 +flat_mae,patch,logistic,ppmi_dx,17,0.005994842503189409,train,0.7170818505338078,0.016163901254105873,0.6642128403133984,0.02134610870153654,0.660645472061657,0.018728737181174543 +flat_mae,patch,logistic,ppmi_dx,17,0.005994842503189409,test,0.6,0.04346773976180496,0.5404411764705883,0.05029041679046397,0.5449915110356536,0.0459161432698831 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,train,0.7277580071174378,0.016085303356814525,0.6820926148442554,0.020137719300222453,0.6762738171697709,0.018153015192587635 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,test,0.6,0.03870594786334523,0.503968253968254,0.049218584060426285,0.5246179966044142,0.04075381041473166 +flat_mae,patch,logistic,ppmi_dx,19,0.005994842503189409,train,0.7170818505338078,0.0167423044424927,0.6653296030381681,0.02159041050329855,0.6615152001712695,0.019061115230025006 +flat_mae,patch,logistic,ppmi_dx,19,0.005994842503189409,test,0.67,0.04172106901794343,0.6239316239316239,0.049490862385113014,0.6218166383701189,0.0453922894938327 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,train,0.7277580071174378,0.016235754959708935,0.6790165855989369,0.020988198501796836,0.6736646328409335,0.018687601010232438 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,test,0.64,0.041204854082983966,0.5863970588235294,0.049194297300836264,0.5874363327674024,0.04454594742733914 +flat_mae,patch,logistic,ppmi_dx,21,0.005994842503189409,train,0.708185053380783,0.016869882956039417,0.6542316926770708,0.021678135416431234,0.6516805823164205,0.019000089805588573 +flat_mae,patch,logistic,ppmi_dx,21,0.005994842503189409,test,0.68,0.038892523703148905,0.6190476190476191,0.0513468515861432,0.6196943972835314,0.04421523557385577 +flat_mae,patch,logistic,ppmi_dx,22,0.005994842503189409,train,0.7206405693950177,0.01654090803987808,0.6673164441461585,0.021964085301139934,0.6635356454720617,0.019163192859379445 +flat_mae,patch,logistic,ppmi_dx,22,0.005994842503189409,test,0.67,0.03640313722744235,0.5862068965517242,0.052509104049083724,0.5963497453310695,0.04164026785669991 +flat_mae,patch,logistic,ppmi_dx,23,0.005994842503189409,train,0.7099644128113879,0.016931715666251717,0.6569102219825245,0.021935960025600467,0.6539953971312353,0.01926321221991437 +flat_mae,patch,logistic,ppmi_dx,23,0.005994842503189409,test,0.63,0.040382823081107154,0.5552350042072365,0.05091302108366085,0.5640916808149405,0.04379936892228645 +flat_mae,patch,logistic,ppmi_dx,24,0.046415888336127774,train,0.7900355871886121,0.015262579749129026,0.765435766836446,0.017948031860482997,0.7555528794690645,0.017511805908656145 +flat_mae,patch,logistic,ppmi_dx,24,0.046415888336127774,test,0.61,0.044820401604626435,0.5555555555555556,0.051532919967652704,0.5581494057724957,0.04742850911166385 +flat_mae,patch,logistic,ppmi_dx,25,0.005994842503189409,train,0.7117437722419929,0.016133887126185176,0.6595773320769955,0.02082471422945259,0.6563102119460501,0.01837291895255061 +flat_mae,patch,logistic,ppmi_dx,25,0.005994842503189409,test,0.64,0.03302713429893669,0.5322245322245323,0.049107400382764434,0.5568760611205432,0.037012675757886325 +flat_mae,patch,logistic,ppmi_dx,26,0.005994842503189409,train,0.7153024911032029,0.015485908956974577,0.660381919265168,0.020655972340912557,0.6574609291372298,0.017893566493181367 +flat_mae,patch,logistic,ppmi_dx,26,0.005994842503189409,test,0.71,0.04014581422763773,0.6640018537828757,0.048821927788670935,0.6591680814940577,0.04469626752528675 +flat_mae,patch,logistic,ppmi_dx,27,0.005994842503189409,train,0.7099644128113879,0.016396118510796493,0.6485060567341598,0.02207466486241246,0.6479073003639477,0.018752503338555465 +flat_mae,patch,logistic,ppmi_dx,27,0.005994842503189409,test,0.68,0.04177493985633012,0.6259934548854604,0.0523387484381796,0.6247877758913413,0.04650743637322754 +flat_mae,patch,logistic,ppmi_dx,28,0.005994842503189409,train,0.7206405693950177,0.015308959983350823,0.6661786126402209,0.020475814404259468,0.6626659173624492,0.017743727229416674 +flat_mae,patch,logistic,ppmi_dx,28,0.005994842503189409,test,0.65,0.037891735246620734,0.5706048337627285,0.050759494956658525,0.580220713073005,0.042060410990578515 +flat_mae,patch,logistic,ppmi_dx,29,0.046415888336127774,train,0.8042704626334519,0.0156014084701333,0.7813384267119412,0.018580949514531573,0.770592485549133,0.018311642104807672 +flat_mae,patch,logistic,ppmi_dx,29,0.046415888336127774,test,0.58,0.04576898513185539,0.525101763907734,0.05168690284102862,0.5288624787775891,0.04808365803548334 +flat_mae,patch,logistic,ppmi_dx,30,0.046415888336127774,train,0.7935943060498221,0.015861958499348777,0.7700004233640507,0.018393767825143813,0.760182509098694,0.017979064933797483 +flat_mae,patch,logistic,ppmi_dx,30,0.046415888336127774,test,0.58,0.03787378512903086,0.4900437105390966,0.04684998176587383,0.5084889643463497,0.03959443803516942 +flat_mae,patch,logistic,ppmi_dx,31,0.000774263682681127,train,0.6708185053380783,0.014313898096866312,0.5785142393838046,0.021520090021948494,0.5943721901091843,0.016429240383676047 +flat_mae,patch,logistic,ppmi_dx,31,0.000774263682681127,test,0.62,0.039726439558561005,0.5287698412698413,0.05187777671979085,0.5458404074702886,0.04277174115834105 +flat_mae,patch,logistic,ppmi_dx,32,0.005994842503189409,train,0.7099644128113879,0.016799720459433315,0.6557653645980122,0.021835011657125994,0.6531256690216227,0.019116800923815257 +flat_mae,patch,logistic,ppmi_dx,32,0.005994842503189409,test,0.64,0.03904866707072087,0.5628946090335114,0.05018249214870704,0.5721561969439728,0.042352789834437604 +flat_mae,patch,logistic,ppmi_dx,33,0.005994842503189409,train,0.7259786476868327,0.015791562225435086,0.6742249725220952,0.020634894886066988,0.6696103618068936,0.01806913216047467 +flat_mae,patch,logistic,ppmi_dx,33,0.005994842503189409,test,0.66,0.03916248715288649,0.5952380952380952,0.050275735918763984,0.5984719864176571,0.043338738493297606 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,train,0.7046263345195729,0.015541499768947597,0.6476462412376118,0.020183599632715,0.6461812245771783,0.01755069189899168 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,test,0.69,0.03852336433906051,0.627359057579036,0.0503782615777245,0.6277589134125636,0.0434694152844657 +flat_mae,patch,logistic,ppmi_dx,35,0.005994842503189409,train,0.7295373665480427,0.016188994643711478,0.6805910770105144,0.021025943093741315,0.6751097195461357,0.018639448382182185 +flat_mae,patch,logistic,ppmi_dx,35,0.005994842503189409,test,0.59,0.04437016565215866,0.539894512400404,0.05015394995738416,0.5420203735144312,0.04660642171912963 +flat_mae,patch,logistic,ppmi_dx,36,0.005994842503189409,train,0.7259786476868327,0.01682306343476117,0.6824858757062147,0.020998055402496402,0.6765681866837936,0.019125538063857552 +flat_mae,patch,logistic,ppmi_dx,36,0.005994842503189409,test,0.6,0.04064846368560563,0.5238095238095238,0.04997210662054391,0.5348047538200339,0.04333897052443603 +flat_mae,patch,logistic,ppmi_dx,37,0.005994842503189409,train,0.697508896797153,0.01556177708903672,0.643932617769827,0.02011428217162366,0.6421403339755941,0.017688627315029813 +flat_mae,patch,logistic,ppmi_dx,37,0.005994842503189409,test,0.64,0.03598490794763828,0.54337899543379,0.05071481888525453,0.5619694397283531,0.03983552538358852 +flat_mae,patch,logistic,ppmi_dx,38,0.005994842503189409,train,0.7224199288256228,0.0169668519299116,0.6732558139534883,0.0219475950893339,0.6684596446157139,0.019555377752348668 +flat_mae,patch,logistic,ppmi_dx,38,0.005994842503189409,test,0.62,0.045451244207392175,0.5824175824175825,0.05028122815067808,0.5814940577249575,0.04846814035538513 +flat_mae,patch,logistic,ppmi_dx,39,0.005994842503189409,train,0.7188612099644128,0.015799661325733454,0.6668817527010804,0.021151709245616546,0.6629602868764719,0.01853109571492047 +flat_mae,patch,logistic,ppmi_dx,39,0.005994842503189409,test,0.69,0.040021314321246365,0.6408295678368672,0.048852209110763316,0.6379456706281834,0.044317865972324656 +flat_mae,patch,logistic,ppmi_dx,40,0.000774263682681127,train,0.6690391459074733,0.013446934139057506,0.5658393141945773,0.0212125691132005,0.587708734746307,0.01552691517548919 +flat_mae,patch,logistic,ppmi_dx,40,0.000774263682681127,test,0.62,0.034462611624773896,0.5062370062370062,0.049078899509542875,0.5356536502546689,0.03759435615344903 +flat_mae,patch,logistic,ppmi_dx,41,0.005994842503189409,train,0.7064056939501779,0.01615097453859584,0.6527005314547027,0.021128429551363345,0.6502354956112182,0.018523832910858202 +flat_mae,patch,logistic,ppmi_dx,41,0.005994842503189409,test,0.69,0.04130051331400131,0.6570417081535569,0.04723612944174216,0.6532258064516129,0.04538879738518044 +flat_mae,patch,logistic,ppmi_dx,42,0.005994842503189409,train,0.7064056939501779,0.015645641872368464,0.6454665366748867,0.02083450030262842,0.6450171269535432,0.017730155140082058 +flat_mae,patch,logistic,ppmi_dx,42,0.005994842503189409,test,0.62,0.038543481939233265,0.5287698412698413,0.05073246341509449,0.5458404074702886,0.04172732635665055 +flat_mae,patch,logistic,ppmi_dx,43,0.005994842503189409,train,0.7188612099644128,0.016872987629002637,0.6711843820357862,0.021212099409995386,0.6664391993149219,0.019010560155319513 +flat_mae,patch,logistic,ppmi_dx,43,0.005994842503189409,test,0.61,0.03915206763377894,0.5215311004784688,0.05065566205039339,0.5377758913412564,0.042122551175577574 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,train,0.7117437722419929,0.016385086755815268,0.6595773320769955,0.02105166406295296,0.6563102119460501,0.018572455839363882 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,test,0.66,0.04202130412064813,0.609375,0.050411263485613304,0.6086587436332768,0.046004809976274534 +flat_mae,patch,logistic,ppmi_dx,45,0.005994842503189409,train,0.7046263345195729,0.016438584918574993,0.6476462412376118,0.021732210662290117,0.6461812245771783,0.018790505723626543 +flat_mae,patch,logistic,ppmi_dx,45,0.005994842503189409,test,0.72,0.03439185950192284,0.6448503297818367,0.051925048013964,0.6468590831918506,0.041443430180353624 +flat_mae,patch,logistic,ppmi_dx,46,0.005994842503189409,train,0.7170818505338078,0.016334277283652637,0.6664290007204638,0.021042751295717112,0.662384928280882,0.01861900369070494 +flat_mae,patch,logistic,ppmi_dx,46,0.005994842503189409,test,0.62,0.04159528338646102,0.5634191176470589,0.05012865900634107,0.566213921901528,0.044979817256958564 +flat_mae,patch,logistic,ppmi_dx,47,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,47,2.782559402207126,test,0.64,0.049109978619421124,0.6179966044142615,0.052262286104529654,0.6179966044142615,0.052196435050811944 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,train,0.708185053380783,0.015766761641501426,0.6469849246231156,0.02140906933693624,0.6464622136587455,0.018151831002568522 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,test,0.72,0.039771723623700295,0.6727442730247779,0.04999904554435399,0.66723259762309,0.04502884291433832 +flat_mae,patch,logistic,ppmi_dx,49,0.000774263682681127,train,0.6672597864768683,0.01348788204161672,0.5644945403311025,0.02095805485312623,0.5862636480411046,0.015479680422946883 +flat_mae,patch,logistic,ppmi_dx,49,0.000774263682681127,test,0.64,0.02702327885361065,0.49181253529079616,0.047213695123054226,0.5415959252971138,0.03097166462176472 +flat_mae,patch,logistic,ppmi_dx,50,0.005994842503189409,train,0.7135231316725978,0.016518532980860288,0.6622331390943061,0.021367599707894945,0.6586250267608649,0.01889358133279687 +flat_mae,patch,logistic,ppmi_dx,50,0.005994842503189409,test,0.63,0.03993476680788308,0.5460679671205987,0.05162370782909036,0.5589983022071308,0.04307981744209661 +flat_mae,patch,logistic,ppmi_dx,51,0.046415888336127774,train,0.7882562277580071,0.016253701478981173,0.7672051600686431,0.018594447397554933,0.7593261614215372,0.01838895906175846 +flat_mae,patch,logistic,ppmi_dx,51,0.046415888336127774,test,0.59,0.042882513918845754,0.5327635327635327,0.04932912415158319,0.5369269949066213,0.04516589259870449 +flat_mae,patch,logistic,ppmi_dx,52,0.005994842503189409,train,0.7188612099644128,0.016697407262146958,0.6701338841255925,0.021731224088693317,0.6655694712053094,0.019386289660493838 +flat_mae,patch,logistic,ppmi_dx,52,0.005994842503189409,test,0.61,0.041539085208993236,0.5481404240528328,0.04978373547944435,0.5530560271646858,0.044727915809697955 +flat_mae,patch,logistic,ppmi_dx,53,0.046415888336127774,train,0.7793594306049823,0.016640521828669055,0.7541383835960542,0.019543465869590992,0.7451429030186256,0.0190786949992221 +flat_mae,patch,logistic,ppmi_dx,53,0.046415888336127774,test,0.59,0.04521493558548989,0.5577607593571352,0.04825094915138925,0.5573005093378608,0.04730410028332581 +flat_mae,patch,logistic,ppmi_dx,54,0.000774263682681127,train,0.6832740213523132,0.014305350012097037,0.5953236245954693,0.02136379801332593,0.6079667094840505,0.016479381002349924 +flat_mae,patch,logistic,ppmi_dx,54,0.000774263682681127,test,0.63,0.033149696831192905,0.5250930560903607,0.04619096011942813,0.548811544991511,0.03595634087392253 +flat_mae,patch,logistic,ppmi_dx,55,0.000774263682681127,train,0.6761565836298933,0.013749003355657987,0.580837636259323,0.021321830898680116,0.5978377221151787,0.016032566928480676 +flat_mae,patch,logistic,ppmi_dx,55,0.000774263682681127,test,0.62,0.03732501038178022,0.5180111618467782,0.05013013816959532,0.5407470288624787,0.03988770476563775 +flat_mae,patch,logistic,ppmi_dx,56,0.046415888336127774,train,0.798932384341637,0.015292621165257342,0.7762321046641508,0.01787540569017623,0.766257225433526,0.01762090878485072 +flat_mae,patch,logistic,ppmi_dx,56,0.046415888336127774,test,0.56,0.04533953683045295,0.4944852941176471,0.05044309423772322,0.5025466893039049,0.046417784845873875 +flat_mae,patch,logistic,ppmi_dx,57,0.005994842503189409,train,0.7135231316725978,0.015732981378490826,0.655284342224068,0.021380304992341698,0.6534066581031899,0.018276932455398725 +flat_mae,patch,logistic,ppmi_dx,57,0.005994842503189409,test,0.72,0.03778081523736618,0.6727442730247779,0.047275683435971085,0.66723259762309,0.04282910942232186 +flat_mae,patch,logistic,ppmi_dx,58,0.005994842503189409,train,0.702846975088968,0.015884001488093866,0.6461264087414551,0.021027970056869686,0.644736137871976,0.018201630032719934 +flat_mae,patch,logistic,ppmi_dx,58,0.005994842503189409,test,0.65,0.0410247291276859,0.5872154735228211,0.051205500922890584,0.5904074702886248,0.04502145278958038 +flat_mae,patch,logistic,ppmi_dx,59,0.005994842503189409,train,0.699288256227758,0.015605858267792063,0.6488473980959424,0.019393179290955204,0.6461946050096339,0.017358715623232965 +flat_mae,patch,logistic,ppmi_dx,59,0.005994842503189409,test,0.71,0.039740533463958436,0.6579785352046232,0.05132574233177451,0.6540747028862479,0.04541887595952848 +flat_mae,patch,logistic,ppmi_dx,60,0.005994842503189409,train,0.7064056939501779,0.01586953048922224,0.6549645587989061,0.02029067933270415,0.6519749518304432,0.017983169127145718 +flat_mae,patch,logistic,ppmi_dx,60,0.005994842503189409,test,0.62,0.04121817075028925,0.5558672276764843,0.0503620633793448,0.5611205432937181,0.0445764322271891 +flat_mae,patch,logistic,ppmi_dx,61,0.005994842503189409,train,0.7117437722419929,0.01688236123577284,0.6561366932559827,0.022226108636312428,0.6537010276172126,0.019309389307018827 +flat_mae,patch,logistic,ppmi_dx,61,0.005994842503189409,test,0.65,0.043219921332644747,0.6011396011396011,0.05119197249407092,0.6005942275042444,0.0471260167275717 +flat_mae,patch,logistic,ppmi_dx,62,0.046415888336127774,train,0.7775800711743772,0.016486771348972605,0.7554676051141209,0.01883644245477349,0.7480464568614857,0.01863614308547962 +flat_mae,patch,logistic,ppmi_dx,62,0.046415888336127774,test,0.6,0.03938428112838928,0.5238095238095238,0.04866644159022544,0.5348047538200339,0.04176885638997226 +flat_mae,patch,logistic,ppmi_dx,63,0.046415888336127774,train,0.7846975088967971,0.014844927746161531,0.7578923008455718,0.017807707888142746,0.7477387069150074,0.017283577844053842 +flat_mae,patch,logistic,ppmi_dx,63,0.046415888336127774,test,0.6,0.04564301041780658,0.5404411764705883,0.0517619119809781,0.5449915110356536,0.047364601556885995 +flat_mae,patch,logistic,ppmi_dx,64,0.005994842503189409,train,0.7099644128113879,0.016081542617809634,0.6569102219825245,0.020897737470472598,0.6539953971312353,0.018345376230333423 +flat_mae,patch,logistic,ppmi_dx,64,0.005994842503189409,test,0.65,0.03736271403418119,0.561128526645768,0.05140248315147535,0.5751273344651953,0.041612312795271555 +flat_mae,patch,logistic,ppmi_dx,65,0.005994842503189409,train,0.7241992882562278,0.01566135526245835,0.6748207239727791,0.02050952735936154,0.6699047313209163,0.018179591657237256 +flat_mae,patch,logistic,ppmi_dx,65,0.005994842503189409,test,0.59,0.03916228287523596,0.5071523019593701,0.047450339685443495,0.5216468590831919,0.040944251226707896 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,train,0.7366548042704626,0.016422854828325385,0.6919954970968124,0.0208181159055745,0.6852387069150075,0.018749211681184416 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,test,0.55,0.04797634417085153,0.4950061721467849,0.05211928067101346,0.49957555178268254,0.04916856987089666 +flat_mae,patch,logistic,ppmi_dx,67,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,67,1291.5496650148827,test,0.55,0.04838594837346892,0.529239460194581,0.049542449813314356,0.5301358234295416,0.04997580956441811 +flat_mae,patch,logistic,ppmi_dx,68,0.000774263682681127,train,0.6761565836298933,0.013932865646640724,0.5751761031366295,0.02217194056380736,0.5952285377863413,0.016298445457097473 +flat_mae,patch,logistic,ppmi_dx,68,0.000774263682681127,test,0.66,0.03531396890750174,0.5582120582120582,0.0535784203301751,0.5780984719864176,0.04053239220724763 +flat_mae,patch,logistic,ppmi_dx,69,0.005994842503189409,train,0.7046263345195729,0.01609962642890557,0.6534318023091668,0.02044298765464718,0.6505298651252408,0.018175170694004667 +flat_mae,patch,logistic,ppmi_dx,69,0.005994842503189409,test,0.62,0.04072610465045731,0.5634191176470589,0.048357911783930285,0.566213921901528,0.043730514113313496 +flat_mae,patch,logistic,ppmi_dx,70,0.005994842503189409,train,0.7188612099644128,0.01617506389750901,0.6657632834966951,0.02146420191400836,0.6620905587668594,0.018772546955213798 +flat_mae,patch,logistic,ppmi_dx,70,0.005994842503189409,test,0.62,0.04763391648814949,0.5876736111111112,0.051364323540812766,0.5865874363327674,0.05016625484246288 +flat_mae,patch,logistic,ppmi_dx,71,0.005994842503189409,train,0.7153024911032029,0.016277394898479458,0.661532438983995,0.021056072386450127,0.6583306572468423,0.01845193876265782 +flat_mae,patch,logistic,ppmi_dx,71,0.005994842503189409,test,0.62,0.03666551513343295,0.5062370062370062,0.04987116846842002,0.5356536502546689,0.039321155004028385 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,train,0.7188612099644128,0.01654098460430001,0.6701338841255925,0.021434569845352903,0.6655694712053094,0.019089365018635233 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,test,0.64,0.0402385685630093,0.5714285714285714,0.049453167099405296,0.5772495755517827,0.043183324082843456 +flat_mae,patch,logistic,ppmi_dx,73,0.005994842503189409,train,0.7099644128113879,0.01670387878971518,0.6580372774681484,0.021620059215116137,0.6548651252408478,0.0191127216811577 +flat_mae,patch,logistic,ppmi_dx,73,0.005994842503189409,test,0.65,0.039478252240949065,0.5872154735228211,0.05041277335334933,0.5904074702886248,0.04394198604378528 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,train,0.7295373665480427,0.01712681618770836,0.6846659283868586,0.021739875272184996,0.6785886319845857,0.019656189712996378 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,test,0.59,0.04245831367353159,0.5249681381068243,0.050546238173920344,0.5318336162988115,0.045291278414387945 +flat_mae,patch,logistic,ppmi_dx,75,0.005994842503189409,train,0.7099644128113879,0.016460290038810096,0.6534211073908026,0.02156890265589735,0.6513862128023977,0.018662831059647642 +flat_mae,patch,logistic,ppmi_dx,75,0.005994842503189409,test,0.68,0.04007837821070108,0.6190476190476191,0.05235607937022129,0.6196943972835314,0.04532605845107308 +flat_mae,patch,logistic,ppmi_dx,76,0.005994842503189409,train,0.7153024911032029,0.01619548829962865,0.6626650660264106,0.0212505248724748,0.6592003853564548,0.01864991901521566 +flat_mae,patch,logistic,ppmi_dx,76,0.005994842503189409,test,0.67,0.04002636631022106,0.6033177064551027,0.05279232204877245,0.6065365025466893,0.044555232474604786 +flat_mae,patch,logistic,ppmi_dx,77,0.005994842503189409,train,0.7170818505338078,0.016415371042993454,0.6685767694413227,0.02107424753029262,0.664124384500107,0.018797996000258135 +flat_mae,patch,logistic,ppmi_dx,77,0.005994842503189409,test,0.66,0.03726446028054076,0.587178241864983,0.04976537548148558,0.5933786078098472,0.04188161480303919 +flat_mae,patch,logistic,ppmi_dx,78,0.005994842503189409,train,0.7277580071174378,0.015449451973847493,0.6790165855989369,0.020052072573631393,0.6736646328409335,0.01781846503591153 +flat_mae,patch,logistic,ppmi_dx,78,0.005994842503189409,test,0.64,0.04025027204877006,0.5535714285714286,0.05303531035730347,0.567062818336163,0.043632387297455555 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,train,0.7117437722419929,0.01631346032355242,0.657301594471295,0.021497629672089513,0.6545707557268251,0.01873375445024533 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,test,0.66,0.043270202218154696,0.609375,0.051877727204190933,0.6086587436332768,0.04742959015462406 +flat_mae,patch,logistic,ppmi_dx,80,0.005994842503189409,train,0.7259786476868327,0.016639514531741253,0.6763883280238105,0.021274006272038227,0.6713498180261186,0.018882726322535435 +flat_mae,patch,logistic,ppmi_dx,80,0.005994842503189409,test,0.63,0.04036043111761816,0.5636277862955537,0.050034068674970324,0.5691850594227504,0.04374659582168292 +flat_mae,patch,logistic,ppmi_dx,81,0.005994842503189409,train,0.7046263345195729,0.015235478112474433,0.6488399054458949,0.020155483288900364,0.6470509526867909,0.01748183200110373 +flat_mae,patch,logistic,ppmi_dx,81,0.005994842503189409,test,0.71,0.03671225953275009,0.6514004087029691,0.04827103450956636,0.648981324278438,0.04163552090513263 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,train,0.7277580071174378,0.015714403115220883,0.6746836161398331,0.021110969226116032,0.6701857204024835,0.01833397469920308 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,test,0.63,0.04082871048661715,0.5552350042072365,0.052043475908389446,0.5640916808149405,0.04447354381589438 +flat_mae,patch,logistic,ppmi_dx,83,0.005994842503189409,train,0.7206405693950177,0.016351225379035224,0.6748008830803138,0.020264768443707903,0.6696237422393492,0.018314304626569248 +flat_mae,patch,logistic,ppmi_dx,83,0.005994842503189409,test,0.55,0.04488474128253386,0.4950061721467849,0.049063062388602766,0.49957555178268254,0.046268002758962444 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,train,0.7206405693950177,0.016366046223926088,0.6727456151087274,0.020882336121732615,0.6678842860201242,0.01867328477500557 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,test,0.62,0.043030221937610316,0.5558672276764843,0.05216053561327231,0.5611205432937181,0.046595878313170074 +flat_mae,patch,logistic,ppmi_dx,85,0.005994842503189409,train,0.7135231316725978,0.016993959832447652,0.6599891024557052,0.022319674804665807,0.6568855705416399,0.01947134647211207 +flat_mae,patch,logistic,ppmi_dx,85,0.005994842503189409,test,0.67,0.039275086250700954,0.6108031607500884,0.04879808525631727,0.6116298811544991,0.04315210083848125 +flat_mae,patch,logistic,ppmi_dx,86,0.005994842503189409,train,0.7117437722419929,0.015953334180295767,0.6617828432173797,0.020383113586638035,0.6580496681652751,0.01808131697880107 +flat_mae,patch,logistic,ppmi_dx,86,0.005994842503189409,test,0.6,0.033981753927659474,0.4802494802494802,0.04660085655962064,0.5144312393887945,0.035912207151033125 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,train,0.7330960854092526,0.015537177036549062,0.681608049311095,0.020634915023924547,0.6762604367373153,0.018047021378966827 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,test,0.64,0.04130024213004084,0.5714285714285714,0.052669706395553774,0.5772495755517827,0.04549350974532303 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,train,0.797153024911032,0.01577350861147377,0.7727988425039363,0.01859601822617641,0.7622029543994862,0.018166409367622836 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,test,0.67,0.04419579618017985,0.6239316239316239,0.053049282059780815,0.6218166383701189,0.04895413199911891 +flat_mae,patch,logistic,ppmi_dx,89,0.000774263682681127,train,0.6637010676156584,0.01202930096729991,0.5450562158689367,0.020377925356645125,0.5764156497538,0.014065314337778339 +flat_mae,patch,logistic,ppmi_dx,89,0.000774263682681127,test,0.69,0.027810789273229896,0.5689055764149632,0.05245936438260108,0.5971986417657046,0.034939224103906354 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,train,0.7046263345195729,0.01625139495694541,0.6523106738223017,0.020509931953599615,0.6496601370156283,0.01822096203518249 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,test,0.6,0.04426409380073199,0.554367201426025,0.048642700141135586,0.5551782682512734,0.04605045177256103 +flat_mae,patch,logistic,ppmi_dx,91,0.005994842503189409,train,0.7277580071174378,0.016149848487215075,0.6768840538864777,0.021186083712887542,0.6719251766217085,0.01859548411006172 +flat_mae,patch,logistic,ppmi_dx,91,0.005994842503189409,test,0.66,0.04199055131812394,0.6026180458158018,0.05130547396716849,0.6035653650254669,0.045415148621044066 +flat_mae,patch,logistic,ppmi_dx,92,0.005994842503189409,train,0.7135231316725978,0.016226360603503288,0.6611199125103463,0.02119540639993342,0.6577552986512524,0.018643524156222183 +flat_mae,patch,logistic,ppmi_dx,92,0.005994842503189409,test,0.71,0.03867410503166169,0.6579785352046232,0.048854507627825276,0.6540747028862479,0.0435394465880211 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,train,0.7241992882562278,0.015637505924705294,0.6737489840938117,0.020497665761945463,0.6690350032113038,0.01807448521474058 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,test,0.61,0.04276084190003747,0.5555555555555556,0.05006736013708233,0.5581494057724957,0.04596553901604652 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,train,0.7224199288256228,0.01676074942666491,0.6753466050479915,0.021199399363619587,0.670199100834939,0.018997352287090805 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,test,0.62,0.04355685939091569,0.5703301673450927,0.04963352692359453,0.5713073005093379,0.04604799234924177 +flat_mae,patch,logistic,ppmi_dx,95,0.000774263682681127,train,0.693950177935943,0.014771212749126711,0.612227214377407,0.021799483768878202,0.6209858702633269,0.017155381305588854 +flat_mae,patch,logistic,ppmi_dx,95,0.000774263682681127,test,0.6,0.02591872682058284,0.435347261434218,0.03782882002620203,0.499151103565365,0.02658462348272596 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,train,0.7206405693950177,0.01626279035318215,0.6737813106571772,0.02104805905053743,0.6687540141297367,0.018881722342296948 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,test,0.58,0.03895564657402056,0.4900437105390966,0.04700482821225266,0.5084889643463497,0.040280001252098706 +flat_mae,patch,logistic,ppmi_dx,97,0.000774263682681127,train,0.6725978647686833,0.01289734116203946,0.5665325285043594,0.02094714353615797,0.5897291800470991,0.015115434323279974 +flat_mae,patch,logistic,ppmi_dx,97,0.000774263682681127,test,0.66,0.031188562005966217,0.5466666666666666,0.050217985971710136,0.5730050933786077,0.03609859343964925 +flat_mae,patch,logistic,ppmi_dx,98,0.046415888336127774,train,0.7811387900355872,0.015376154734548881,0.7532298311813025,0.018465402657404217,0.7431090772853779,0.017830720393493024 +flat_mae,patch,logistic,ppmi_dx,98,0.046415888336127774,test,0.67,0.04562281885197362,0.6239316239316239,0.0542794661392299,0.6218166383701189,0.050046180575418465 +flat_mae,patch,logistic,ppmi_dx,99,0.005994842503189409,train,0.7206405693950177,0.015854933656108412,0.6673164441461585,0.020674144792773037,0.6635356454720617,0.0181203580187174 +flat_mae,patch,logistic,ppmi_dx,99,0.005994842503189409,test,0.66,0.04129570922020834,0.5952380952380952,0.051478102868304,0.5984719864176571,0.04498487328906596 +flat_mae,patch,logistic,ppmi_dx,100,0.005994842503189409,train,0.7099644128113879,0.01701832010092387,0.6569102219825245,0.02192360850433153,0.6539953971312353,0.01931226359302563 +flat_mae,patch,logistic,ppmi_dx,100,0.005994842503189409,test,0.64,0.043991503725151294,0.5863970588235294,0.05331733670580259,0.5874363327674024,0.04829926095997664 diff --git a/data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic/log.txt b/data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..227736c60db689aaa77e8976b11f6da7c3e87a54 --- /dev/null +++ b/data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic/log.txt @@ -0,0 +1,247 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:21:08 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n100_2; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n100_2/eval_v2/ppmi_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: ppmi_dx (flat) +train (n=463): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 463 +}), + labels=['PD' 'Prodromal'], + counts=[178 285] +) + +validation (n=99): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 99 +}), + labels=['PD' 'Prodromal'], + counts=[39 60] +) + +test (n=100): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 100 +}), + labels=['PD' 'Prodromal'], + counts=[37 63] +) + +extracting features for all splits +extract (train) [ 0/232] eta: 0:14:11 time: 3.6686 data: 2.9641 max mem: 2698 +extract (train) [ 20/232] eta: 0:01:13 time: 0.1820 data: 0.0567 max mem: 2851 +extract (train) [ 40/232] eta: 0:00:49 time: 0.1645 data: 0.0451 max mem: 2851 +extract (train) [ 60/232] eta: 0:00:39 time: 0.1773 data: 0.0526 max mem: 2851 +extract (train) [ 80/232] eta: 0:00:33 time: 0.1798 data: 0.0517 max mem: 2851 +extract (train) [100/232] eta: 0:00:27 time: 0.1647 data: 0.0489 max mem: 2851 +extract (train) [120/232] eta: 0:00:22 time: 0.1620 data: 0.0463 max mem: 2851 +extract (train) [140/232] eta: 0:00:18 time: 0.1739 data: 0.0476 max mem: 2851 +extract (train) [160/232] eta: 0:00:13 time: 0.1698 data: 0.0488 max mem: 2851 +extract (train) [180/232] eta: 0:00:09 time: 0.1679 data: 0.0498 max mem: 2851 +extract (train) [200/232] eta: 0:00:06 time: 0.1709 data: 0.0519 max mem: 2851 +extract (train) [220/232] eta: 0:00:02 time: 0.1433 data: 0.0391 max mem: 2851 +extract (train) [231/232] eta: 0:00:00 time: 0.1395 data: 0.0398 max mem: 2851 +extract (train) Total time: 0:00:42 (0.1839 s / it) +extract (validation) [ 0/50] eta: 0:02:54 time: 3.4954 data: 3.3335 max mem: 2851 +extract (validation) [20/50] eta: 0:00:11 time: 0.2114 data: 0.0625 max mem: 2851 +extract (validation) [40/50] eta: 0:00:02 time: 0.1345 data: 0.0326 max mem: 2851 +extract (validation) [49/50] eta: 0:00:00 time: 0.1309 data: 0.0320 max mem: 2851 +extract (validation) Total time: 0:00:11 (0.2374 s / it) +extract (test) [ 0/50] eta: 0:02:53 time: 3.4723 data: 3.3099 max mem: 2851 +extract (test) [20/50] eta: 0:00:10 time: 0.2066 data: 0.0609 max mem: 2851 +extract (test) [40/50] eta: 0:00:02 time: 0.1301 data: 0.0309 max mem: 2851 +extract (test) [49/50] eta: 0:00:00 time: 0.1335 data: 0.0343 max mem: 2851 +extract (test) Total time: 0:00:11 (0.2339 s / it) +feature extraction time: 0:01:06 +train features: (463, 768) +validation features: (99, 768) +test features: (100, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|-------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | | 0.0059948 | train | 0.7153 | 0.01652 | 0.6691 | 0.020825 | 0.66468 | 0.018751 | +| flat_mae | patch | logistic | ppmi_dx | | 0.0059948 | test | 0.6 | 0.040802 | 0.51433 | 0.049654 | 0.52638 | 0.042939 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.042910436958856525, "f1": 0.5636277862955537, "f1_std": 0.052661966233036685, "bacc": 0.5691850594227504, "bacc_std": 0.046042239444618674} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04592576183363755, "f1": 0.6043956043956044, "f1_std": 0.04998183011352542, "bacc": 0.6027164685908319, "bacc_std": 0.04832999056978765} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04314486759743272, "f1": 0.612789025334661, "f1_std": 0.049220156160455775, "bacc": 0.6107809847198642, "bacc_std": 0.04704041778046909} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04272147469364792, "f1": 0.6239316239316239, "f1_std": 0.05001481661872014, "bacc": 0.6218166383701189, "bacc_std": 0.04634679800420373} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04216384707305537, "f1": 0.5324918186068257, "f1_std": 0.04959622890638098, "bacc": 0.5398981324278438, "bacc_std": 0.04406331995021132} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04257212233375265, "f1": 0.5400400990682863, "f1_std": 0.053588468882064436, "bacc": 0.547962648556876, "bacc_std": 0.0465224406717436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.045790549243266344, "f1": 0.5824175824175825, "f1_std": 0.05140039161643029, "bacc": 0.5814940577249575, "bacc_std": 0.0493052508620957} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.03796197571254689, "f1": 0.5460679671205987, "f1_std": 0.0493629908011042, "bacc": 0.5589983022071308, "bacc_std": 0.04099909192315337} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.04008483004828634, "f1": 0.6493688639551192, "f1_std": 0.04955815575951578, "bacc": 0.6460101867572157, "bacc_std": 0.04458778549670705} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04026114752462974, "f1": 0.6026180458158018, "f1_std": 0.04940179498687207, "bacc": 0.6035653650254669, "bacc_std": 0.044058183457382445} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.040367194601557334, "f1": 0.6579785352046232, "f1_std": 0.05249896150907498, "bacc": 0.6540747028862479, "bacc_std": 0.04637017230555599} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.03768212308243791, "f1": 0.48589341692789967, "f1_std": 0.048101291326795116, "bacc": 0.5114601018675722, "bacc_std": 0.03938125257107833} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04076264957040943, "f1": 0.5481404240528328, "f1_std": 0.0476285931785262, "bacc": 0.5530560271646858, "bacc_std": 0.04269974502653194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 14, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04125929713410058, "f1": 0.5636277862955537, "f1_std": 0.05111540045144083, "bacc": 0.5691850594227504, "bacc_std": 0.04462291644387462} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 15, "C": 0.000774263682681127, "split": "test", "acc": 0.65, "acc_std": 0.037807935674934706, "f1": 0.561128526645768, "f1_std": 0.051910085885185295, "bacc": 0.5751273344651953, "bacc_std": 0.04183739426858399} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.044401851312754974, "f1": 0.5703301673450927, "f1_std": 0.05105053289393683, "bacc": 0.5713073005093379, "bacc_std": 0.04730951064567415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04346773976180496, "f1": 0.5404411764705883, "f1_std": 0.05029041679046397, "bacc": 0.5449915110356536, "bacc_std": 0.0459161432698831} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.03870594786334523, "f1": 0.503968253968254, "f1_std": 0.049218584060426285, "bacc": 0.5246179966044142, "bacc_std": 0.04075381041473166} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04172106901794343, "f1": 0.6239316239316239, "f1_std": 0.049490862385113014, "bacc": 0.6218166383701189, "bacc_std": 0.0453922894938327} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.041204854082983966, "f1": 0.5863970588235294, "f1_std": 0.049194297300836264, "bacc": 0.5874363327674024, "bacc_std": 0.04454594742733914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.038892523703148905, "f1": 0.6190476190476191, "f1_std": 0.0513468515861432, "bacc": 0.6196943972835314, "bacc_std": 0.04421523557385577} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.03640313722744235, "f1": 0.5862068965517242, "f1_std": 0.052509104049083724, "bacc": 0.5963497453310695, "bacc_std": 0.04164026785669991} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.040382823081107154, "f1": 0.5552350042072365, "f1_std": 0.05091302108366085, "bacc": 0.5640916808149405, "bacc_std": 0.04379936892228645} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.044820401604626435, "f1": 0.5555555555555556, "f1_std": 0.051532919967652704, "bacc": 0.5581494057724957, "bacc_std": 0.04742850911166385} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 25, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.03302713429893669, "f1": 0.5322245322245323, "f1_std": 0.049107400382764434, "bacc": 0.5568760611205432, "bacc_std": 0.037012675757886325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.04014581422763773, "f1": 0.6640018537828757, "f1_std": 0.048821927788670935, "bacc": 0.6591680814940577, "bacc_std": 0.04469626752528675} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04177493985633012, "f1": 0.6259934548854604, "f1_std": 0.0523387484381796, "bacc": 0.6247877758913413, "bacc_std": 0.04650743637322754} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.037891735246620734, "f1": 0.5706048337627285, "f1_std": 0.050759494956658525, "bacc": 0.580220713073005, "bacc_std": 0.042060410990578515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.58, "acc_std": 0.04576898513185539, "f1": 0.525101763907734, "f1_std": 0.05168690284102862, "bacc": 0.5288624787775891, "bacc_std": 0.04808365803548334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.58, "acc_std": 0.03787378512903086, "f1": 0.4900437105390966, "f1_std": 0.04684998176587383, "bacc": 0.5084889643463497, "bacc_std": 0.03959443803516942} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 31, "C": 0.000774263682681127, "split": "test", "acc": 0.62, "acc_std": 0.039726439558561005, "f1": 0.5287698412698413, "f1_std": 0.05187777671979085, "bacc": 0.5458404074702886, "bacc_std": 0.04277174115834105} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.03904866707072087, "f1": 0.5628946090335114, "f1_std": 0.05018249214870704, "bacc": 0.5721561969439728, "bacc_std": 0.042352789834437604} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.03916248715288649, "f1": 0.5952380952380952, "f1_std": 0.050275735918763984, "bacc": 0.5984719864176571, "bacc_std": 0.043338738493297606} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.03852336433906051, "f1": 0.627359057579036, "f1_std": 0.0503782615777245, "bacc": 0.6277589134125636, "bacc_std": 0.0434694152844657} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.04437016565215866, "f1": 0.539894512400404, "f1_std": 0.05015394995738416, "bacc": 0.5420203735144312, "bacc_std": 0.04660642171912963} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04064846368560563, "f1": 0.5238095238095238, "f1_std": 0.04997210662054391, "bacc": 0.5348047538200339, "bacc_std": 0.04333897052443603} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.03598490794763828, "f1": 0.54337899543379, "f1_std": 0.05071481888525453, "bacc": 0.5619694397283531, "bacc_std": 0.03983552538358852} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.045451244207392175, "f1": 0.5824175824175825, "f1_std": 0.05028122815067808, "bacc": 0.5814940577249575, "bacc_std": 0.04846814035538513} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.040021314321246365, "f1": 0.6408295678368672, "f1_std": 0.048852209110763316, "bacc": 0.6379456706281834, "bacc_std": 0.044317865972324656} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 40, "C": 0.000774263682681127, "split": "test", "acc": 0.62, "acc_std": 0.034462611624773896, "f1": 0.5062370062370062, "f1_std": 0.049078899509542875, "bacc": 0.5356536502546689, "bacc_std": 0.03759435615344903} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.04130051331400131, "f1": 0.6570417081535569, "f1_std": 0.04723612944174216, "bacc": 0.6532258064516129, "bacc_std": 0.04538879738518044} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.038543481939233265, "f1": 0.5287698412698413, "f1_std": 0.05073246341509449, "bacc": 0.5458404074702886, "bacc_std": 0.04172732635665055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.03915206763377894, "f1": 0.5215311004784688, "f1_std": 0.05065566205039339, "bacc": 0.5377758913412564, "bacc_std": 0.042122551175577574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04202130412064813, "f1": 0.609375, "f1_std": 0.050411263485613304, "bacc": 0.6086587436332768, "bacc_std": 0.046004809976274534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.72, "acc_std": 0.03439185950192284, "f1": 0.6448503297818367, "f1_std": 0.051925048013964, "bacc": 0.6468590831918506, "bacc_std": 0.041443430180353624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04159528338646102, "f1": 0.5634191176470589, "f1_std": 0.05012865900634107, "bacc": 0.566213921901528, "bacc_std": 0.044979817256958564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 47, "C": 2.782559402207126, "split": "test", "acc": 0.64, "acc_std": 0.049109978619421124, "f1": 0.6179966044142615, "f1_std": 0.052262286104529654, "bacc": 0.6179966044142615, "bacc_std": 0.052196435050811944} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.72, "acc_std": 0.039771723623700295, "f1": 0.6727442730247779, "f1_std": 0.04999904554435399, "bacc": 0.66723259762309, "bacc_std": 0.04502884291433832} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 49, "C": 0.000774263682681127, "split": "test", "acc": 0.64, "acc_std": 0.02702327885361065, "f1": 0.49181253529079616, "f1_std": 0.047213695123054226, "bacc": 0.5415959252971138, "bacc_std": 0.03097166462176472} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.03993476680788308, "f1": 0.5460679671205987, "f1_std": 0.05162370782909036, "bacc": 0.5589983022071308, "bacc_std": 0.04307981744209661} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.042882513918845754, "f1": 0.5327635327635327, "f1_std": 0.04932912415158319, "bacc": 0.5369269949066213, "bacc_std": 0.04516589259870449} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 52, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.041539085208993236, "f1": 0.5481404240528328, "f1_std": 0.04978373547944435, "bacc": 0.5530560271646858, "bacc_std": 0.044727915809697955} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.04521493558548989, "f1": 0.5577607593571352, "f1_std": 0.04825094915138925, "bacc": 0.5573005093378608, "bacc_std": 0.04730410028332581} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 54, "C": 0.000774263682681127, "split": "test", "acc": 0.63, "acc_std": 0.033149696831192905, "f1": 0.5250930560903607, "f1_std": 0.04619096011942813, "bacc": 0.548811544991511, "bacc_std": 0.03595634087392253} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 55, "C": 0.000774263682681127, "split": "test", "acc": 0.62, "acc_std": 0.03732501038178022, "f1": 0.5180111618467782, "f1_std": 0.05013013816959532, "bacc": 0.5407470288624787, "bacc_std": 0.03988770476563775} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.56, "acc_std": 0.04533953683045295, "f1": 0.4944852941176471, "f1_std": 0.05044309423772322, "bacc": 0.5025466893039049, "bacc_std": 0.046417784845873875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.72, "acc_std": 0.03778081523736618, "f1": 0.6727442730247779, "f1_std": 0.047275683435971085, "bacc": 0.66723259762309, "bacc_std": 0.04282910942232186} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.0410247291276859, "f1": 0.5872154735228211, "f1_std": 0.051205500922890584, "bacc": 0.5904074702886248, "bacc_std": 0.04502145278958038} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.039740533463958436, "f1": 0.6579785352046232, "f1_std": 0.05132574233177451, "bacc": 0.6540747028862479, "bacc_std": 0.04541887595952848} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04121817075028925, "f1": 0.5558672276764843, "f1_std": 0.0503620633793448, "bacc": 0.5611205432937181, "bacc_std": 0.0445764322271891} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.043219921332644747, "f1": 0.6011396011396011, "f1_std": 0.05119197249407092, "bacc": 0.6005942275042444, "bacc_std": 0.0471260167275717} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.03938428112838928, "f1": 0.5238095238095238, "f1_std": 0.04866644159022544, "bacc": 0.5348047538200339, "bacc_std": 0.04176885638997226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.04564301041780658, "f1": 0.5404411764705883, "f1_std": 0.0517619119809781, "bacc": 0.5449915110356536, "bacc_std": 0.047364601556885995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.03736271403418119, "f1": 0.561128526645768, "f1_std": 0.05140248315147535, "bacc": 0.5751273344651953, "bacc_std": 0.041612312795271555} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.03916228287523596, "f1": 0.5071523019593701, "f1_std": 0.047450339685443495, "bacc": 0.5216468590831919, "bacc_std": 0.040944251226707896} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.55, "acc_std": 0.04797634417085153, "f1": 0.4950061721467849, "f1_std": 0.05211928067101346, "bacc": 0.49957555178268254, "bacc_std": 0.04916856987089666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 67, "C": 1291.5496650148827, "split": "test", "acc": 0.55, "acc_std": 0.04838594837346892, "f1": 0.529239460194581, "f1_std": 0.049542449813314356, "bacc": 0.5301358234295416, "bacc_std": 0.04997580956441811} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 68, "C": 0.000774263682681127, "split": "test", "acc": 0.66, "acc_std": 0.03531396890750174, "f1": 0.5582120582120582, "f1_std": 0.0535784203301751, "bacc": 0.5780984719864176, "bacc_std": 0.04053239220724763} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04072610465045731, "f1": 0.5634191176470589, "f1_std": 0.048357911783930285, "bacc": 0.566213921901528, "bacc_std": 0.043730514113313496} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04763391648814949, "f1": 0.5876736111111112, "f1_std": 0.051364323540812766, "bacc": 0.5865874363327674, "bacc_std": 0.05016625484246288} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.03666551513343295, "f1": 0.5062370062370062, "f1_std": 0.04987116846842002, "bacc": 0.5356536502546689, "bacc_std": 0.039321155004028385} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.0402385685630093, "f1": 0.5714285714285714, "f1_std": 0.049453167099405296, "bacc": 0.5772495755517827, "bacc_std": 0.043183324082843456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.039478252240949065, "f1": 0.5872154735228211, "f1_std": 0.05041277335334933, "bacc": 0.5904074702886248, "bacc_std": 0.04394198604378528} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.04245831367353159, "f1": 0.5249681381068243, "f1_std": 0.050546238173920344, "bacc": 0.5318336162988115, "bacc_std": 0.045291278414387945} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04007837821070108, "f1": 0.6190476190476191, "f1_std": 0.05235607937022129, "bacc": 0.6196943972835314, "bacc_std": 0.04532605845107308} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04002636631022106, "f1": 0.6033177064551027, "f1_std": 0.05279232204877245, "bacc": 0.6065365025466893, "bacc_std": 0.044555232474604786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.03726446028054076, "f1": 0.587178241864983, "f1_std": 0.04976537548148558, "bacc": 0.5933786078098472, "bacc_std": 0.04188161480303919} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04025027204877006, "f1": 0.5535714285714286, "f1_std": 0.05303531035730347, "bacc": 0.567062818336163, "bacc_std": 0.043632387297455555} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.043270202218154696, "f1": 0.609375, "f1_std": 0.051877727204190933, "bacc": 0.6086587436332768, "bacc_std": 0.04742959015462406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04036043111761816, "f1": 0.5636277862955537, "f1_std": 0.050034068674970324, "bacc": 0.5691850594227504, "bacc_std": 0.04374659582168292} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.03671225953275009, "f1": 0.6514004087029691, "f1_std": 0.04827103450956636, "bacc": 0.648981324278438, "bacc_std": 0.04163552090513263} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04082871048661715, "f1": 0.5552350042072365, "f1_std": 0.052043475908389446, "bacc": 0.5640916808149405, "bacc_std": 0.04447354381589438} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.55, "acc_std": 0.04488474128253386, "f1": 0.4950061721467849, "f1_std": 0.049063062388602766, "bacc": 0.49957555178268254, "bacc_std": 0.046268002758962444} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.043030221937610316, "f1": 0.5558672276764843, "f1_std": 0.05216053561327231, "bacc": 0.5611205432937181, "bacc_std": 0.046595878313170074} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.039275086250700954, "f1": 0.6108031607500884, "f1_std": 0.04879808525631727, "bacc": 0.6116298811544991, "bacc_std": 0.04315210083848125} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.033981753927659474, "f1": 0.4802494802494802, "f1_std": 0.04660085655962064, "bacc": 0.5144312393887945, "bacc_std": 0.035912207151033125} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04130024213004084, "f1": 0.5714285714285714, "f1_std": 0.052669706395553774, "bacc": 0.5772495755517827, "bacc_std": 0.04549350974532303} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04419579618017985, "f1": 0.6239316239316239, "f1_std": 0.053049282059780815, "bacc": 0.6218166383701189, "bacc_std": 0.04895413199911891} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 89, "C": 0.000774263682681127, "split": "test", "acc": 0.69, "acc_std": 0.027810789273229896, "f1": 0.5689055764149632, "f1_std": 0.05245936438260108, "bacc": 0.5971986417657046, "bacc_std": 0.034939224103906354} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04426409380073199, "f1": 0.554367201426025, "f1_std": 0.048642700141135586, "bacc": 0.5551782682512734, "bacc_std": 0.04605045177256103} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04199055131812394, "f1": 0.6026180458158018, "f1_std": 0.05130547396716849, "bacc": 0.6035653650254669, "bacc_std": 0.045415148621044066} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.03867410503166169, "f1": 0.6579785352046232, "f1_std": 0.048854507627825276, "bacc": 0.6540747028862479, "bacc_std": 0.0435394465880211} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04276084190003747, "f1": 0.5555555555555556, "f1_std": 0.05006736013708233, "bacc": 0.5581494057724957, "bacc_std": 0.04596553901604652} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04355685939091569, "f1": 0.5703301673450927, "f1_std": 0.04963352692359453, "bacc": 0.5713073005093379, "bacc_std": 0.04604799234924177} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 95, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.02591872682058284, "f1": 0.435347261434218, "f1_std": 0.03782882002620203, "bacc": 0.499151103565365, "bacc_std": 0.02658462348272596} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.58, "acc_std": 0.03895564657402056, "f1": 0.4900437105390966, "f1_std": 0.04700482821225266, "bacc": 0.5084889643463497, "bacc_std": 0.040280001252098706} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 97, "C": 0.000774263682681127, "split": "test", "acc": 0.66, "acc_std": 0.031188562005966217, "f1": 0.5466666666666666, "f1_std": 0.050217985971710136, "bacc": 0.5730050933786077, "bacc_std": 0.03609859343964925} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04562281885197362, "f1": 0.6239316239316239, "f1_std": 0.0542794661392299, "bacc": 0.6218166383701189, "bacc_std": 0.050046180575418465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04129570922020834, "f1": 0.5952380952380952, "f1_std": 0.051478102868304, "bacc": 0.5984719864176571, "bacc_std": 0.04498487328906596} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.043991503725151294, "f1": 0.5863970588235294, "f1_std": 0.05331733670580259, "bacc": 0.5874363327674024, "bacc_std": 0.04829926095997664} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | train | 100 | 12.953 | 129.15 | 0.72593 | 0.048752 | 0.67361 | 0.065176 | 0.6716 | 0.06115 | +| flat_mae | patch | logistic | ppmi_dx | test | 100 | 12.953 | 129.15 | 0.637 | 0.039505 | 0.56903 | 0.0497 | 0.57651 | 0.041909 | + + +done! total time: 0:05:01 diff --git a/data_scaling/n100_2/pretrain/config.yaml b/data_scaling/n100_2/pretrain/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ce2341254e2b329e94caa70c4d853aa2a1654fe --- /dev/null +++ b/data_scaling/n100_2/pretrain/config.yaml @@ -0,0 +1,109 @@ +name: data_scaling/n100_2/pretrain +notes: data scaling experiment n100_2 (seed=3472) +output_dir: experiments/data_scaling/output/data_scaling/n100_2/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..00899}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 3472 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 diff --git a/data_scaling/n100_2/pretrain/log.json b/data_scaling/n100_2/pretrain/log.json new file mode 100644 index 0000000000000000000000000000000000000000..47d129b00f5d39b0644f53cc45ef8879d2219012 --- /dev/null +++ b/data_scaling/n100_2/pretrain/log.json @@ -0,0 +1,100 @@ +{"epoch": 0, "train/lr": 1.2502400076802458e-05, "train/grad": 0.058978364590853455, "train/loss": 0.9930299731063843, "eval/hcp-train-subset/loss": 0.9913841091817425, "eval/hcp-val/loss": 0.9908787675442234, "eval/nsd-val/loss": 0.9918544004040379} +{"epoch": 1, "train/lr": 3.750320010240327e-05, "train/grad": 0.0995599273866415, "train/loss": 0.9861798432350158, "eval/hcp-train-subset/loss": 0.9879004551518348, "eval/hcp-val/loss": 0.9878451045482389, "eval/nsd-val/loss": 0.9887365612291521} +{"epoch": 2, "train/lr": 6.250400012800409e-05, "train/grad": 0.13864124939620742, "train/loss": 0.9817143550777435, "eval/hcp-train-subset/loss": 0.9826328600606611, "eval/hcp-val/loss": 0.9822695466779894, "eval/nsd-val/loss": 0.982202451075277} +{"epoch": 3, "train/lr": 8.75048001536049e-05, "train/grad": 0.21714609180421607, "train/loss": 0.9730535706710816, "eval/hcp-train-subset/loss": 0.9760161724782759, "eval/hcp-val/loss": 0.9755971585550616, "eval/nsd-val/loss": 0.9749795236895161} +{"epoch": 4, "train/lr": 0.00011250559953918529, "train/grad": 0.2306045431544749, "train/loss": 0.9498667807579041, "eval/hcp-train-subset/loss": 0.932417546549151, "eval/hcp-val/loss": 0.9314482058248212, "eval/nsd-val/loss": 0.9086325408950928} +{"epoch": 5, "train/lr": 0.00012498860637884563, "train/grad": 0.1880400450120492, "train/loss": 0.9069649071788788, "eval/hcp-train-subset/loss": 0.8941405473216888, "eval/hcp-val/loss": 0.8926209086372007, "eval/nsd-val/loss": 0.8582378770074537} +{"epoch": 6, "train/lr": 0.0001249202705377922, "train/grad": 0.15570657053279252, "train/loss": 0.8714651505851746, "eval/hcp-train-subset/loss": 0.8790750234357773, "eval/hcp-val/loss": 0.8774362715982622, "eval/nsd-val/loss": 0.8454262431590788} +{"epoch": 7, "train/lr": 0.0001247836790473516, "train/grad": 0.13562883515740898, "train/loss": 0.8501630558395386, "eval/hcp-train-subset/loss": 0.8715145914785324, "eval/hcp-val/loss": 0.8694385972715193, "eval/nsd-val/loss": 0.8400004804134369} +{"epoch": 8, "train/lr": 0.000124578981268311, "train/grad": 0.13275497304867492, "train/loss": 0.835232338552475, "eval/hcp-train-subset/loss": 0.8694759405428364, "eval/hcp-val/loss": 0.8680152902680058, "eval/nsd-val/loss": 0.840494466404761} +{"epoch": 9, "train/lr": 0.00012430640103468907, "train/grad": 0.13390775200780886, "train/loss": 0.8215871210193634, "eval/hcp-train-subset/loss": 0.8693336834830623, "eval/hcp-val/loss": 0.8677343226248219, "eval/nsd-val/loss": 0.8413756778163295} +{"epoch": 10, "train/lr": 0.00012396623640896796, "train/grad": 0.14048435756991523, "train/loss": 0.8024283881282807, "eval/hcp-train-subset/loss": 0.8689949416345165, "eval/hcp-val/loss": 0.8678911770543745, "eval/nsd-val/loss": 0.8441790669195114} +{"epoch": 11, "train/lr": 0.0001235588593561712, "train/grad": 0.14197609380302562, "train/loss": 0.7882439747142792, "eval/hcp-train-subset/loss": 0.8675615902869932, "eval/hcp-val/loss": 0.8661387687729251, "eval/nsd-val/loss": 0.8473090039145562} +{"epoch": 12, "train/lr": 0.00012308471533712604, "train/grad": 0.1412679997180507, "train/loss": 0.7806126366996765, "eval/hcp-train-subset/loss": 0.868413322394894, "eval/hcp-val/loss": 0.8668010648219816, "eval/nsd-val/loss": 0.8429384779545569} +{"epoch": 13, "train/lr": 0.00012254432282135565, "train/grad": 0.14420571521924022, "train/loss": 0.7747139093875886, "eval/hcp-train-subset/loss": 0.8681636923743833, "eval/hcp-val/loss": 0.8666582684363088, "eval/nsd-val/loss": 0.8469036000390207} +{"epoch": 14, "train/lr": 0.00012193827272014171, "train/grad": 0.14960657376202657, "train/loss": 0.7629804454898834, "eval/hcp-train-subset/loss": 0.8710966417866368, "eval/hcp-val/loss": 0.8692076158139014, "eval/nsd-val/loss": 0.850804748073701} +{"epoch": 15, "train/lr": 0.00012126722774037197, "train/grad": 0.15002414989774754, "train/loss": 0.756057783536911, "eval/hcp-train-subset/loss": 0.8689133572962976, "eval/hcp-val/loss": 0.8673050470890538, "eval/nsd-val/loss": 0.8457032336342719} +{"epoch": 16, "train/lr": 0.00012053192165988122, "train/grad": 0.15082470806590154, "train/loss": 0.7494651935863494, "eval/hcp-train-subset/loss": 0.8702033690867885, "eval/hcp-val/loss": 0.8692097827311485, "eval/nsd-val/loss": 0.8525055406555053} +{"epoch": 17, "train/lr": 0.00011973315852507104, "train/grad": 0.15487326757705974, "train/loss": 0.740165676984787, "eval/hcp-train-subset/loss": 0.8716615063528861, "eval/hcp-val/loss": 0.8704537730063161, "eval/nsd-val/loss": 0.8516513061138892} +{"epoch": 18, "train/lr": 0.00011887181177170142, "train/grad": 0.15705069197907032, "train/loss": 0.7399013666534424, "eval/hcp-train-subset/loss": 0.8724129786414485, "eval/hcp-val/loss": 0.8722625595908011, "eval/nsd-val/loss": 0.8548171674051592} +{"epoch": 19, "train/lr": 0.00011794882326980209, "train/grad": 0.1566808602895478, "train/loss": 0.7369363503646851, "eval/hcp-train-subset/loss": 0.871530354984345, "eval/hcp-val/loss": 0.871278868567559, "eval/nsd-val/loss": 0.8530508923915124} +{"epoch": 20, "train/lr": 0.00011696520229374954, "train/grad": 0.15585346522689744, "train/loss": 0.7344265076589584, "eval/hcp-train-subset/loss": 0.8735609044951778, "eval/hcp-val/loss": 0.8714754062314187, "eval/nsd-val/loss": 0.8571754290211585} +{"epoch": 21, "train/lr": 0.00011592202441863837, "train/grad": 0.15678592041261258, "train/loss": 0.7301416373825074, "eval/hcp-train-subset/loss": 0.8734777367884113, "eval/hcp-val/loss": 0.8725026269112864, "eval/nsd-val/loss": 0.8540443710742458} +{"epoch": 22, "train/lr": 0.00011482043034415979, "train/grad": 0.15782542592546547, "train/loss": 0.7235857809925079, "eval/hcp-train-subset/loss": 0.8759710327271493, "eval/hcp-val/loss": 0.8751066404004251, "eval/nsd-val/loss": 0.8584438514324927} +{"epoch": 23, "train/lr": 0.00011366162464726024, "train/grad": 0.1583758990645733, "train/loss": 0.7286690841007233, "eval/hcp-train-subset/loss": 0.8749237993071156, "eval/hcp-val/loss": 0.874075036856436, "eval/nsd-val/loss": 0.8597892830448766} +{"epoch": 24, "train/lr": 0.0001124468744649569, "train/grad": 0.15872533115899184, "train/loss": 0.718452690486908, "eval/hcp-train-subset/loss": 0.8755600798514581, "eval/hcp-val/loss": 0.8747326798977391, "eval/nsd-val/loss": 0.8606308237198861} +{"epoch": 25, "train/lr": 0.0001111775081087387, "train/grad": 0.15779106793913242, "train/loss": 0.7281369743442535, "eval/hcp-train-subset/loss": 0.8761403060728504, "eval/hcp-val/loss": 0.8754614687735035, "eval/nsd-val/loss": 0.8590107089088809} +{"epoch": 26, "train/lr": 0.0001098549136120796, "train/grad": 0.16229131554996312, "train/loss": 0.7180594158935547, "eval/hcp-train-subset/loss": 0.8783993557576211, "eval/hcp-val/loss": 0.8777373404272141, "eval/nsd-val/loss": 0.8629164022784079} +{"epoch": 27, "train/lr": 0.00010848053721264312, "train/grad": 0.16195779644840047, "train/loss": 0.715651161403656, "eval/hcp-train-subset/loss": 0.8769238898831029, "eval/hcp-val/loss": 0.8753851825191129, "eval/nsd-val/loss": 0.8604411848129765} +{"epoch": 28, "train/lr": 0.00010705588177084458, "train/grad": 0.16107142539654654, "train/loss": 0.7160282084560394, "eval/hcp-train-subset/loss": 0.8779125328986875, "eval/hcp-val/loss": 0.8773228218478542, "eval/nsd-val/loss": 0.8563066817099049} +{"epoch": 29, "train/lr": 0.00010558250512649171, "train/grad": 0.1609545538915028, "train/loss": 0.7145852316379547, "eval/hcp-train-subset/loss": 0.8801385866057488, "eval/hcp-val/loss": 0.8798975704177734, "eval/nsd-val/loss": 0.8555769708848768} +{"epoch": 30, "train/lr": 0.00010406201839531515, "train/grad": 0.16413648343881035, "train/loss": 0.7083958812999726, "eval/hcp-train-subset/loss": 0.8831842041784718, "eval/hcp-val/loss": 0.8826290282510942, "eval/nsd-val/loss": 0.865038170929878} +{"epoch": 31, "train/lr": 0.00010249608420723018, "train/grad": 0.16286730810782804, "train/loss": 0.7077531179141998, "eval/hcp-train-subset/loss": 0.881541964507872, "eval/hcp-val/loss": 0.8807804209570731, "eval/nsd-val/loss": 0.8605091821762824} +{"epoch": 32, "train/lr": 0.00010088641488828097, "train/grad": 0.16527805466409987, "train/loss": 0.7078055777788163, "eval/hcp-train-subset/loss": 0.8799822532361553, "eval/hcp-val/loss": 0.8782029267280332, "eval/nsd-val/loss": 0.8589563494728457} +{"epoch": 33, "train/lr": 9.923477058823526e-05, "train/grad": 0.1667053174207797, "train/loss": 0.7031077146625518, "eval/hcp-train-subset/loss": 0.8860252528421341, "eval/hcp-val/loss": 0.8835779716891627, "eval/nsd-val/loss": 0.8646641873544262} +{"epoch": 34, "train/lr": 9.754295735588547e-05, "train/grad": 0.16926264733190555, "train/loss": 0.7032364563846588, "eval/hcp-train-subset/loss": 0.8878523226707212, "eval/hcp-val/loss": 0.8873667255524667, "eval/nsd-val/loss": 0.867237271801118} +{"epoch": 35, "train/lr": 9.581282516416285e-05, "train/grad": 0.17133608624921218, "train/loss": 0.7039361276626587, "eval/hcp-train-subset/loss": 0.888773609553614, "eval/hcp-val/loss": 0.8875824818688054, "eval/nsd-val/loss": 0.8657150268554688} +{"epoch": 36, "train/lr": 9.404626588721676e-05, "train/grad": 0.1691347729275756, "train/loss": 0.703108725605011, "eval/hcp-train-subset/loss": 0.8867921233177185, "eval/hcp-val/loss": 0.8874159186117111, "eval/nsd-val/loss": 0.8645237626567963} +{"epoch": 37, "train/lr": 9.224521123168153e-05, "train/grad": 0.1661000486898422, "train/loss": 0.7005726275730133, "eval/hcp-train-subset/loss": 0.8865634174116196, "eval/hcp-val/loss": 0.8852559933739323, "eval/nsd-val/loss": 0.8679306901270344} +{"epoch": 38, "train/lr": 9.041163062437843e-05, "train/grad": 0.16877997548623877, "train/loss": 0.6964129964447021, "eval/hcp-train-subset/loss": 0.885086323945753, "eval/hcp-val/loss": 0.8850108846541374, "eval/nsd-val/loss": 0.8636455189797186} +{"epoch": 39, "train/lr": 8.85475290587822e-05, "train/grad": 0.17297506143390254, "train/loss": 0.6917873746013642, "eval/hcp-train-subset/loss": 0.8852957669765719, "eval/hcp-val/loss": 0.8843539251435187, "eval/nsd-val/loss": 0.8635145550773989} +{"epoch": 40, "train/lr": 8.665494490258622e-05, "train/grad": 0.17417896986436912, "train/loss": 0.6951419233322144, "eval/hcp-train-subset/loss": 0.8900237583344982, "eval/hcp-val/loss": 0.8885802963087636, "eval/nsd-val/loss": 0.8734667474223722} +{"epoch": 41, "train/lr": 8.473594766877838e-05, "train/grad": 0.17336811137680208, "train/loss": 0.6940248907279968, "eval/hcp-train-subset/loss": 0.886890665177376, "eval/hcp-val/loss": 0.8873982237231347, "eval/nsd-val/loss": 0.8768348914961661} +{"epoch": 42, "train/lr": 8.279263575265999e-05, "train/grad": 0.17045623057482528, "train/loss": 0.6906386895465851, "eval/hcp-train-subset/loss": 0.8897539607940181, "eval/hcp-val/loss": 0.8896968691579757, "eval/nsd-val/loss": 0.8782707318182914} +{"epoch": 43, "train/lr": 8.082713413727944e-05, "train/grad": 0.17262498243429078, "train/loss": 0.6929392415714264, "eval/hcp-train-subset/loss": 0.8901660634625342, "eval/hcp-val/loss": 0.8885776525543582, "eval/nsd-val/loss": 0.8744739332506734} +{"epoch": 44, "train/lr": 7.884159206979602e-05, "train/grad": 0.17611779187890617, "train/loss": 0.6960114043331146, "eval/hcp-train-subset/loss": 0.8925866696142382, "eval/hcp-val/loss": 0.8922337207102007, "eval/nsd-val/loss": 0.874357904157331} +{"epoch": 45, "train/lr": 7.683818071130916e-05, "train/grad": 0.17544444830236758, "train/loss": 0.6903141882228852, "eval/hcp-train-subset/loss": 0.892562823910867, "eval/hcp-val/loss": 0.8916592867143692, "eval/nsd-val/loss": 0.875374186423517} +{"epoch": 46, "train/lr": 7.481909076272522e-05, "train/grad": 0.1742878544970717, "train/loss": 0.6940496349906922, "eval/hcp-train-subset/loss": 0.8932989518488607, "eval/hcp-val/loss": 0.8943770297112004, "eval/nsd-val/loss": 0.883274527326707} +{"epoch": 47, "train/lr": 7.278653006925963e-05, "train/grad": 0.1754826944650517, "train/loss": 0.6915277039670944, "eval/hcp-train-subset/loss": 0.8946365291072477, "eval/hcp-val/loss": 0.8917284934751449, "eval/nsd-val/loss": 0.8796791991879863} +{"epoch": 48, "train/lr": 7.074272120618864e-05, "train/grad": 0.1794046121708332, "train/loss": 0.6906513880443573, "eval/hcp-train-subset/loss": 0.8916664527308557, "eval/hcp-val/loss": 0.8911264692583392, "eval/nsd-val/loss": 0.8758158645322246} +{"epoch": 49, "train/lr": 6.868989904849677e-05, "train/grad": 0.18013309191425883, "train/loss": 0.6846365108013153, "eval/hcp-train-subset/loss": 0.8938165787727602, "eval/hcp-val/loss": 0.8941367345471536, "eval/nsd-val/loss": 0.8782670065279929} +{"epoch": 50, "train/lr": 6.6630308327075e-05, "train/grad": 0.17543716347092547, "train/loss": 0.6877876575088501, "eval/hcp-train-subset/loss": 0.8922323632624841, "eval/hcp-val/loss": 0.8933630816398128, "eval/nsd-val/loss": 0.8780486833664679} +{"epoch": 51, "train/lr": 6.456620117413798e-05, "train/grad": 0.180661566988299, "train/loss": 0.6816006429958343, "eval/hcp-train-subset/loss": 0.8946469080063605, "eval/hcp-val/loss": 0.893725952794475, "eval/nsd-val/loss": 0.885449101847987} +{"epoch": 52, "train/lr": 6.249983466055255e-05, "train/grad": 0.18525125970072567, "train/loss": 0.6869856260299683, "eval/hcp-train-subset/loss": 0.8957781868596231, "eval/hcp-val/loss": 0.8948188799042855, "eval/nsd-val/loss": 0.8855771143590251} +{"epoch": 53, "train/lr": 6.0433468327763305e-05, "train/grad": 0.1842946135766909, "train/loss": 0.6838855184745789, "eval/hcp-train-subset/loss": 0.8967791334275277, "eval/hcp-val/loss": 0.896911185595297, "eval/nsd-val/loss": 0.8879104093197854} +{"epoch": 54, "train/lr": 5.83693617170174e-05, "train/grad": 0.18428664833603, "train/loss": 0.684917045621872, "eval/hcp-train-subset/loss": 0.8977033169038834, "eval/hcp-val/loss": 0.896769413063603, "eval/nsd-val/loss": 0.885810649202716} +{"epoch": 55, "train/lr": 5.6309771898588165e-05, "train/grad": 0.18862667091325586, "train/loss": 0.6787202494621277, "eval/hcp-train-subset/loss": 0.8962667757464994, "eval/hcp-val/loss": 0.8967677085630356, "eval/nsd-val/loss": 0.8850436624019377} +{"epoch": 56, "train/lr": 5.4256951003704155e-05, "train/grad": 0.18924562345559193, "train/loss": 0.6778595862436294, "eval/hcp-train-subset/loss": 0.8969077910146406, "eval/hcp-val/loss": 0.8963496511982333, "eval/nsd-val/loss": 0.891418339744691} +{"epoch": 57, "train/lr": 5.221314376187425e-05, "train/grad": 0.19157293192347177, "train/loss": 0.6759249137020111, "eval/hcp-train-subset/loss": 0.8967329177164263, "eval/hcp-val/loss": 0.8980701450378664, "eval/nsd-val/loss": 0.8860646841987487} +{"epoch": 58, "train/lr": 5.018058504631059e-05, "train/grad": 0.19179969036143163, "train/loss": 0.6820934421920777, "eval/hcp-train-subset/loss": 0.895566338492978, "eval/hcp-val/loss": 0.8955505692189739, "eval/nsd-val/loss": 0.8873067661639182} +{"epoch": 59, "train/lr": 4.816149743012713e-05, "train/grad": 0.1905714564821975, "train/loss": 0.6780764900970458, "eval/hcp-train-subset/loss": 0.8991149481265776, "eval/hcp-val/loss": 0.9004887919272145, "eval/nsd-val/loss": 0.8952788085706772} +{"epoch": 60, "train/lr": 4.615808875598772e-05, "train/grad": 0.1956312608345866, "train/loss": 0.6754466897058486, "eval/hcp-train-subset/loss": 0.9010274833248507, "eval/hcp-val/loss": 0.902209238659951, "eval/nsd-val/loss": 0.8925014311267484} +{"epoch": 61, "train/lr": 4.417254972186445e-05, "train/grad": 0.1977799236106552, "train/loss": 0.6758817591094971, "eval/hcp-train-subset/loss": 0.8997223069590907, "eval/hcp-val/loss": 0.9002241396134899, "eval/nsd-val/loss": 0.894621116499747} +{"epoch": 62, "train/lr": 4.220705148553925e-05, "train/grad": 0.2013413098949193, "train/loss": 0.6714001271152497, "eval/hcp-train-subset/loss": 0.9013205968564556, "eval/hcp-val/loss": 0.9013160457534175, "eval/nsd-val/loss": 0.8950866787664352} +{"epoch": 63, "train/lr": 4.026374329047657e-05, "train/grad": 0.20248051792531085, "train/loss": 0.665819394826889, "eval/hcp-train-subset/loss": 0.9020887171068499, "eval/hcp-val/loss": 0.9019613333286778, "eval/nsd-val/loss": 0.9018644719354568} +{"epoch": 64, "train/lr": 3.834475011565652e-05, "train/grad": 0.20219691026176437, "train/loss": 0.671153001317978, "eval/hcp-train-subset/loss": 0.9007356782113353, "eval/hcp-val/loss": 0.9015966894165162, "eval/nsd-val/loss": 0.8997949506005933} +{"epoch": 65, "train/lr": 3.6452170351940815e-05, "train/grad": 0.20596884415662467, "train/loss": 0.6755003395700455, "eval/hcp-train-subset/loss": 0.9024082056937679, "eval/hcp-val/loss": 0.9031262205493066, "eval/nsd-val/loss": 0.9022274286516251} +{"epoch": 66, "train/lr": 3.458807350751516e-05, "train/grad": 0.20370466488827815, "train/loss": 0.6750241988706589, "eval/hcp-train-subset/loss": 0.9024912368866705, "eval/hcp-val/loss": 0.902176433993924, "eval/nsd-val/loss": 0.9063991654303766} +{"epoch": 67, "train/lr": 3.2754497944910164e-05, "train/grad": 0.21074706379791647, "train/loss": 0.6713280308151245, "eval/hcp-train-subset/loss": 0.9036892267965502, "eval/hcp-val/loss": 0.9049218960346714, "eval/nsd-val/loss": 0.9047655568968865} +{"epoch": 68, "train/lr": 3.0953448652083367e-05, "train/grad": 0.2083858649587944, "train/loss": 0.6754008478164673, "eval/hcp-train-subset/loss": 0.9029688719780214, "eval/hcp-val/loss": 0.9041331593067415, "eval/nsd-val/loss": 0.9033623597314281} +{"epoch": 69, "train/lr": 2.9186895049993948e-05, "train/grad": 0.2164289451827806, "train/loss": 0.6665620513105392, "eval/hcp-train-subset/loss": 0.9037106585118079, "eval/hcp-val/loss": 0.9033464683640388, "eval/nsd-val/loss": 0.9055573536503699} +{"epoch": 70, "train/lr": 2.7456768839068717e-05, "train/grad": 0.21566271720628197, "train/loss": 0.6706394767284394, "eval/hcp-train-subset/loss": 0.9050276519790772, "eval/hcp-val/loss": 0.9057126150977227, "eval/nsd-val/loss": 0.9115870306568761} +{"epoch": 71, "train/lr": 2.5764961886919063e-05, "train/grad": 0.21674290117920972, "train/loss": 0.6690386509943008, "eval/hcp-train-subset/loss": 0.9044102824503376, "eval/hcp-val/loss": 0.903777947348933, "eval/nsd-val/loss": 0.9041506715359227} +{"epoch": 72, "train/lr": 2.411332415960724e-05, "train/grad": 0.21345795814076704, "train/loss": 0.6711630883646011, "eval/hcp-train-subset/loss": 0.9040715809791319, "eval/hcp-val/loss": 0.9047430782548843, "eval/nsd-val/loss": 0.9134010836001365} +{"epoch": 73, "train/lr": 2.2503661698739544e-05, "train/grad": 0.21767681614256348, "train/loss": 0.6672415968418122, "eval/hcp-train-subset/loss": 0.9069805635559943, "eval/hcp-val/loss": 0.9074559778936447, "eval/nsd-val/loss": 0.9132314478197405} +{"epoch": 74, "train/lr": 2.0937734646583902e-05, "train/grad": 0.22517192362308044, "train/loss": 0.6629974451303482, "eval/hcp-train-subset/loss": 0.9062246372622829, "eval/hcp-val/loss": 0.9066441376363078, "eval/nsd-val/loss": 0.9078872809487004} +{"epoch": 75, "train/lr": 1.9417255321381202e-05, "train/grad": 0.22048548673541524, "train/loss": 0.665981637134552, "eval/hcp-train-subset/loss": 0.9073389364827064, "eval/hcp-val/loss": 0.9063464355084204, "eval/nsd-val/loss": 0.9098146971194975} +{"epoch": 76, "train/lr": 1.7943886344950134e-05, "train/grad": 0.22585901538823538, "train/loss": 0.6638450914907456, "eval/hcp-train-subset/loss": 0.9082812859166053, "eval/hcp-val/loss": 0.9085386847296069, "eval/nsd-val/loss": 0.9097700618928478} +{"epoch": 77, "train/lr": 1.651923882463461e-05, "train/grad": 0.2330891453367778, "train/loss": 0.6682168117952347, "eval/hcp-train-subset/loss": 0.9079392725421537, "eval/hcp-val/loss": 0.9090513394724938, "eval/nsd-val/loss": 0.9138169932749963} +{"epoch": 78, "train/lr": 1.5144870591581508e-05, "train/grad": 0.24436815736607512, "train/loss": 0.6684086576080323, "eval/hcp-train-subset/loss": 0.9094149951011904, "eval/hcp-val/loss": 0.9094034664092525, "eval/nsd-val/loss": 0.9194405742229954} +{"epoch": 79, "train/lr": 1.3822284497275662e-05, "train/grad": 0.236284100634038, "train/loss": 0.659176780204773, "eval/hcp-train-subset/loss": 0.9080848213165037, "eval/hcp-val/loss": 0.9091965200439576, "eval/nsd-val/loss": 0.9126650767941629} +{"epoch": 80, "train/lr": 1.2552926770192975e-05, "train/grad": 0.2274717396667562, "train/loss": 0.6624745452928543, "eval/hcp-train-subset/loss": 0.9091685789246713, "eval/hcp-val/loss": 0.9107361032116797, "eval/nsd-val/loss": 0.9098617646002001} +{"epoch": 81, "train/lr": 1.1338185434371453e-05, "train/grad": 0.23391505519688968, "train/loss": 0.6657101407718659, "eval/hcp-train-subset/loss": 0.9101374620391477, "eval/hcp-val/loss": 0.9090137673962501, "eval/nsd-val/loss": 0.9122217768622983} +{"epoch": 82, "train/lr": 1.0179388791627326e-05, "train/grad": 0.23814718828633324, "train/loss": 0.6577391204118729, "eval/hcp-train-subset/loss": 0.9095327882997452, "eval/hcp-val/loss": 0.9091723070990655, "eval/nsd-val/loss": 0.9147122204303741} +{"epoch": 83, "train/lr": 9.07780396907607e-06, "train/grad": 0.2376051642615992, "train/loss": 0.6633330807304383, "eval/hcp-train-subset/loss": 0.9104247785383656, "eval/hcp-val/loss": 0.9094589458357903, "eval/nsd-val/loss": 0.9188831313963859} +{"epoch": 84, "train/lr": 8.034635533547902e-06, "train/grad": 0.24070938636402864, "train/loss": 0.6606400653839112, "eval/hcp-train-subset/loss": 0.910281531272396, "eval/hcp-val/loss": 0.9115254196428484, "eval/nsd-val/loss": 0.9171427699827379} +{"epoch": 85, "train/lr": 7.051024174411275e-06, "train/grad": 0.25532715568478753, "train/loss": 0.6585619413375855, "eval/hcp-train-subset/loss": 0.9101687640913071, "eval/hcp-val/loss": 0.9105878549237405, "eval/nsd-val/loss": 0.916442955693891} +{"epoch": 86, "train/lr": 6.1280454562463606e-06, "train/grad": 0.25734381449495447, "train/loss": 0.6618985905551911, "eval/hcp-train-subset/loss": 0.9109580026518914, "eval/hcp-val/loss": 0.910580022681144, "eval/nsd-val/loss": 0.921444546791815} +{"epoch": 87, "train/lr": 5.266708642730326e-06, "train/grad": 0.24668161571741334, "train/loss": 0.6651595003890991, "eval/hcp-train-subset/loss": 0.9111428779940451, "eval/hcp-val/loss": 0.9109068214893341, "eval/nsd-val/loss": 0.9178418257544118} +{"epoch": 88, "train/lr": 4.467955593022733e-06, "train/grad": 0.24077236551490927, "train/loss": 0.6611389973878861, "eval/hcp-train-subset/loss": 0.9117964352330854, "eval/hcp-val/loss": 0.9123884468309341, "eval/nsd-val/loss": 0.9146611421338974} +{"epoch": 89, "train/lr": 3.732659731856291e-06, "train/grad": 0.24518935046088375, "train/loss": 0.65776183218956, "eval/hcp-train-subset/loss": 0.9131759578181852, "eval/hcp-val/loss": 0.9113064071824474, "eval/nsd-val/loss": 0.9155370968003427} +{"epoch": 90, "train/lr": 3.0616250944596583e-06, "train/grad": 0.24098151419636762, "train/loss": 0.6608706478071212, "eval/hcp-train-subset/loss": 0.9122391198911974, "eval/hcp-val/loss": 0.911472164815472, "eval/nsd-val/loss": 0.9167498244393256} +{"epoch": 91, "train/lr": 2.4555854473568305e-06, "train/grad": 0.24498429488702567, "train/loss": 0.6612438510179519, "eval/hcp-train-subset/loss": 0.9130194773597102, "eval/hcp-val/loss": 0.9122071525742931, "eval/nsd-val/loss": 0.9194852805906727} +{"epoch": 92, "train/lr": 1.915203486004091e-06, "train/grad": 0.248988640875044, "train/loss": 0.6619106415271759, "eval/hcp-train-subset/loss": 0.9130770391033541, "eval/hcp-val/loss": 0.9123224692959939, "eval/nsd-val/loss": 0.9181727103648647} +{"epoch": 93, "train/lr": 1.4410701101423926e-06, "train/grad": 0.24749123091867883, "train/loss": 0.6666936877918244, "eval/hcp-train-subset/loss": 0.9126282238191173, "eval/hcp-val/loss": 0.9130826102149102, "eval/nsd-val/loss": 0.9191458590569035} +{"epoch": 94, "train/lr": 1.0337037776570775e-06, "train/grad": 0.24626917444007346, "train/loss": 0.6627118910598755, "eval/hcp-train-subset/loss": 0.9129584498943821, "eval/hcp-val/loss": 0.9124064166699687, "eval/nsd-val/loss": 0.9172658035832066} +{"epoch": 95, "train/lr": 6.935499376518293e-07, "train/grad": 0.24973602873459996, "train/loss": 0.6594024547052384, "eval/hcp-train-subset/loss": 0.9140850757398913, "eval/hcp-val/loss": 0.9129628808267655, "eval/nsd-val/loss": 0.9182492254241821} +{"epoch": 96, "train/lr": 4.209805433566085e-07, "train/grad": 0.2535508230712598, "train/loss": 0.6628539302062988, "eval/hcp-train-subset/loss": 0.9136599936792927, "eval/hcp-val/loss": 0.9131649771044331, "eval/nsd-val/loss": 0.9192430905757412} +{"epoch": 97, "train/lr": 2.1629364540224422e-07, "train/grad": 0.2533139981706263, "train/loss": 0.6603912528896332, "eval/hcp-train-subset/loss": 0.9135764029718214, "eval/hcp-val/loss": 0.9121302577757067, "eval/nsd-val/loss": 0.9197464095007989} +{"epoch": 98, "train/lr": 7.971306590647406e-08, "train/grad": 0.25274703276454974, "train/loss": 0.6625218396377563, "eval/hcp-train-subset/loss": 0.9131646560084435, "eval/hcp-val/loss": 0.9135911724259776, "eval/nsd-val/loss": 0.919530741630062} +{"epoch": 99, "train/lr": 1.1388153727718725e-08, "train/grad": 0.24676919773242484, "train/loss": 0.6615917481803893, "eval/hcp-train-subset/loss": 0.913109814928424, "eval/hcp-val/loss": 0.9125427065357086, "eval/nsd-val/loss": 0.9198265575593517} diff --git a/data_scaling/n100_2/pretrain/log.txt b/data_scaling/n100_2/pretrain/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..21187d4bfd9801dfab39b3f5e241dfa0cc8451c1 --- /dev/null +++ b/data_scaling/n100_2/pretrain/log.txt @@ -0,0 +1,8234 @@ +pretraining fmri mae +start: 2026-01-17 20:35:42 +cwd: /admin/home/connor/fmri-fm +sha: 4c3ccfb0b63e4f01e9758042b5299530a6d93949, status: has uncommitted changes, branch: dev/clane9 +config: +name: data_scaling/n100_2/pretrain +notes: data scaling experiment n100_2 (seed=3472) +output_dir: experiments/data_scaling/output/data_scaling/n100_2/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..00899}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 3472 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 + +train transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +val transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +mask generator: +TubeMasking( + mask_ratio=0.9 + (patchify): Patchify2D((224, 560), (16, 16), in_chans=1) +) +loading dataset: hcp-train + +type: wds +url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..00899}.tar +clipping: random +clipping_kwargs: + oversample: 4.0 +shuffle: true +buffer_size: 2000 +samples_per_epoch: 200000 + +loading dataset: hcp-train-subset + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [8543, 6917, 6772, 3955, 6165, 1554, 1082, 5811, 6919, 3150] +loading dataset: hcp-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1075, 1189, 738, 1350, 965, 1964, 1367, 1183, 1619, 1407] +loading dataset: nsd-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1493, 4276, 245, 3092, 3905, 1862, 2362, 4411, 1138, 2824] +model: +MaskedAutoencoderViT( + decoding=attn, t_pred_stride=2, pred_edge_pad=0, no_decode_pos=True + (encoder): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) + (pred_patchify): StridedPatchify3D((16, 224, 560), (2, 16, 16), in_chans=1, t_stride=2) + (decoder): MaskedDecoder( + cross_decode=False, class_token=True, no_embed_class=True + (pos_embed): SeparablePosEmbed(512, (4, 14, 35)) + (proj): Linear(in_features=768, out_features=512, bias=True) + (blocks): ModuleList( + (0-3): 4 x Block( + (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=16 + (q): Linear(in_features=512, out_features=512, bias=True) + (k): Linear(in_features=512, out_features=512, bias=True) + (v): Linear(in_features=512, out_features=512, bias=True) + (proj): Linear(in_features=512, out_features=512, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=512, out_features=2048, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=2048, out_features=512, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (head): Linear(in_features=512, out_features=512, bias=True) + ) +) +num params: 99.7M +total batch size: 32 = 32 bs per gpu x 1 accum x 1 gpus +lr: 1.25e-04 = 1.00e-03 x 32 / 256 +full schedule: epochs = 100 (steps = 625000) +warmup: epochs = 5 (steps = 31250) +start training for 100 epochs +Train: [0] [ 0/6250] eta: 12:05:43 lr: 0.000000 grad: 0.0132 (0.0132) loss: 0.9955 (0.9955) time: 6.9670 data: 5.9473 max mem: 8570 +Train: [0] [ 100/6250] eta: 0:19:10 lr: 0.000000 grad: 0.0142 (0.0163) loss: 0.9958 (0.9958) time: 0.1399 data: 0.0667 max mem: 9377 +Train: [0] [ 200/6250] eta: 0:16:30 lr: 0.000001 grad: 0.0132 (0.0153) loss: 0.9956 (0.9959) time: 0.1425 data: 0.0563 max mem: 9377 +Train: [0] [ 300/6250] eta: 0:16:16 lr: 0.000001 grad: 0.0125 (0.0147) loss: 0.9958 (0.9959) time: 0.2124 data: 0.1178 max mem: 9377 +Train: [0] [ 400/6250] eta: 0:15:45 lr: 0.000002 grad: 0.0128 (0.0142) loss: 0.9958 (0.9959) time: 0.1678 data: 0.0843 max mem: 9377 +Train: [0] [ 500/6250] eta: 0:15:32 lr: 0.000002 grad: 0.0126 (0.0140) loss: 0.9960 (0.9959) time: 0.1605 data: 0.0685 max mem: 9377 +Train: [0] [ 600/6250] eta: 0:15:20 lr: 0.000002 grad: 0.0128 (0.0139) loss: 0.9962 (0.9960) time: 0.1998 data: 0.1068 max mem: 9377 +Train: [0] [ 700/6250] eta: 0:14:55 lr: 0.000003 grad: 0.0127 (0.0137) loss: 0.9959 (0.9960) time: 0.1682 data: 0.0658 max mem: 9377 +Train: [0] [ 800/6250] eta: 0:14:33 lr: 0.000003 grad: 0.0130 (0.0136) loss: 0.9960 (0.9960) time: 0.1461 data: 0.0484 max mem: 9377 +Train: [0] [ 900/6250] eta: 0:14:13 lr: 0.000004 grad: 0.0131 (0.0135) loss: 0.9954 (0.9960) time: 0.1728 data: 0.0740 max mem: 9377 +Train: [0] [1000/6250] eta: 0:13:51 lr: 0.000004 grad: 0.0132 (0.0135) loss: 0.9958 (0.9960) time: 0.1302 data: 0.0438 max mem: 9377 +Train: [0] [1100/6250] eta: 0:13:29 lr: 0.000004 grad: 0.0136 (0.0135) loss: 0.9962 (0.9960) time: 0.1279 data: 0.0200 max mem: 9377 +Train: [0] [1200/6250] eta: 0:13:11 lr: 0.000005 grad: 0.0159 (0.0136) loss: 0.9958 (0.9960) time: 0.1487 data: 0.0635 max mem: 9377 +Train: [0] [1300/6250] eta: 0:12:49 lr: 0.000005 grad: 0.0158 (0.0138) loss: 0.9956 (0.9959) time: 0.1280 data: 0.0340 max mem: 9377 +Train: [0] [1400/6250] eta: 0:12:30 lr: 0.000006 grad: 0.0194 (0.0143) loss: 0.9956 (0.9959) time: 0.1349 data: 0.0478 max mem: 9377 +Train: [0] [1500/6250] eta: 0:12:09 lr: 0.000006 grad: 0.0146 (0.0147) loss: 0.9959 (0.9959) time: 0.1390 data: 0.0481 max mem: 9377 +Train: [0] [1600/6250] eta: 0:11:51 lr: 0.000006 grad: 0.0224 (0.0153) loss: 0.9958 (0.9959) time: 0.1516 data: 0.0598 max mem: 9377 +Train: [0] [1700/6250] eta: 0:11:34 lr: 0.000007 grad: 0.0261 (0.0160) loss: 0.9954 (0.9959) time: 0.1525 data: 0.0594 max mem: 9377 +Train: [0] [1800/6250] eta: 0:11:16 lr: 0.000007 grad: 0.0387 (0.0170) loss: 0.9951 (0.9959) time: 0.1479 data: 0.0585 max mem: 9377 +Train: [0] [1900/6250] eta: 0:11:00 lr: 0.000008 grad: 0.0475 (0.0182) loss: 0.9955 (0.9958) time: 0.1492 data: 0.0510 max mem: 9377 +Train: [0] [2000/6250] eta: 0:10:42 lr: 0.000008 grad: 0.0427 (0.0197) loss: 0.9946 (0.9958) time: 0.1341 data: 0.0414 max mem: 9377 +Train: [0] [2100/6250] eta: 0:10:25 lr: 0.000008 grad: 0.0420 (0.0209) loss: 0.9946 (0.9958) time: 0.1253 data: 0.0274 max mem: 9377 +Train: [0] [2200/6250] eta: 0:10:11 lr: 0.000009 grad: 0.0473 (0.0221) loss: 0.9949 (0.9957) time: 0.1679 data: 0.0759 max mem: 9377 +Train: [0] [2300/6250] eta: 0:09:54 lr: 0.000009 grad: 0.0488 (0.0234) loss: 0.9941 (0.9957) time: 0.1443 data: 0.0472 max mem: 9377 +Train: [0] [2400/6250] eta: 0:09:38 lr: 0.000010 grad: 0.0485 (0.0247) loss: 0.9940 (0.9956) time: 0.1354 data: 0.0458 max mem: 9377 +Train: [0] [2500/6250] eta: 0:09:22 lr: 0.000010 grad: 0.0426 (0.0257) loss: 0.9948 (0.9956) time: 0.1350 data: 0.0420 max mem: 9377 +Train: [0] [2600/6250] eta: 0:09:06 lr: 0.000010 grad: 0.0456 (0.0265) loss: 0.9935 (0.9955) time: 0.1445 data: 0.0540 max mem: 9377 +Train: [0] [2700/6250] eta: 0:08:50 lr: 0.000011 grad: 0.0506 (0.0275) loss: 0.9944 (0.9955) time: 0.1280 data: 0.0245 max mem: 9377 +Train: [0] [2800/6250] eta: 0:08:36 lr: 0.000011 grad: 0.0556 (0.0286) loss: 0.9941 (0.9954) time: 0.1581 data: 0.0627 max mem: 9377 +Train: [0] [2900/6250] eta: 0:08:20 lr: 0.000012 grad: 0.0504 (0.0296) loss: 0.9931 (0.9954) time: 0.1335 data: 0.0424 max mem: 9377 +Train: [0] [3000/6250] eta: 0:08:05 lr: 0.000012 grad: 0.0496 (0.0304) loss: 0.9938 (0.9953) time: 0.1589 data: 0.0708 max mem: 9377 +Train: [0] [3100/6250] eta: 0:07:49 lr: 0.000012 grad: 0.0671 (0.0313) loss: 0.9938 (0.9953) time: 0.1412 data: 0.0414 max mem: 9377 +Train: [0] [3200/6250] eta: 0:07:33 lr: 0.000013 grad: 0.0513 (0.0321) loss: 0.9942 (0.9952) time: 0.1293 data: 0.0320 max mem: 9377 +Train: [0] [3300/6250] eta: 0:07:18 lr: 0.000013 grad: 0.0617 (0.0331) loss: 0.9932 (0.9952) time: 0.1360 data: 0.0474 max mem: 9377 +Train: [0] [3400/6250] eta: 0:07:03 lr: 0.000014 grad: 0.0610 (0.0341) loss: 0.9921 (0.9951) time: 0.1612 data: 0.0730 max mem: 9377 +Train: [0] [3500/6250] eta: 0:06:48 lr: 0.000014 grad: 0.0739 (0.0351) loss: 0.9923 (0.9951) time: 0.1250 data: 0.0378 max mem: 9377 +Train: [0] [3600/6250] eta: 0:06:33 lr: 0.000014 grad: 0.0765 (0.0361) loss: 0.9914 (0.9950) time: 0.1545 data: 0.0634 max mem: 9377 +Train: [0] [3700/6250] eta: 0:06:19 lr: 0.000015 grad: 0.0697 (0.0370) loss: 0.9921 (0.9949) time: 0.1634 data: 0.0665 max mem: 9377 +Train: [0] [3800/6250] eta: 0:06:04 lr: 0.000015 grad: 0.0769 (0.0379) loss: 0.9924 (0.9949) time: 0.1334 data: 0.0408 max mem: 9377 +Train: [0] [3900/6250] eta: 0:05:49 lr: 0.000016 grad: 0.0813 (0.0390) loss: 0.9900 (0.9948) time: 0.1265 data: 0.0357 max mem: 9377 +Train: [0] [4000/6250] eta: 0:05:35 lr: 0.000016 grad: 0.0668 (0.0399) loss: 0.9912 (0.9947) time: 0.1717 data: 0.0808 max mem: 9377 +Train: [0] [4100/6250] eta: 0:05:20 lr: 0.000016 grad: 0.0735 (0.0408) loss: 0.9917 (0.9946) time: 0.1748 data: 0.0831 max mem: 9377 +Train: [0] [4200/6250] eta: 0:05:05 lr: 0.000017 grad: 0.0836 (0.0418) loss: 0.9913 (0.9946) time: 0.1558 data: 0.0718 max mem: 9377 +Train: [0] [4300/6250] eta: 0:04:50 lr: 0.000017 grad: 0.0828 (0.0427) loss: 0.9915 (0.9945) time: 0.1629 data: 0.0780 max mem: 9377 +Train: [0] [4400/6250] eta: 0:04:36 lr: 0.000018 grad: 0.0767 (0.0436) loss: 0.9916 (0.9944) time: 0.1541 data: 0.0742 max mem: 9377 +Train: [0] [4500/6250] eta: 0:04:21 lr: 0.000018 grad: 0.0880 (0.0446) loss: 0.9909 (0.9944) time: 0.1566 data: 0.0673 max mem: 9377 +Train: [0] [4600/6250] eta: 0:04:07 lr: 0.000018 grad: 0.0979 (0.0457) loss: 0.9900 (0.9943) time: 0.1658 data: 0.0888 max mem: 9377 +Train: [0] [4700/6250] eta: 0:03:52 lr: 0.000019 grad: 0.0887 (0.0467) loss: 0.9880 (0.9942) time: 0.1517 data: 0.0637 max mem: 9377 +Train: [0] [4800/6250] eta: 0:03:37 lr: 0.000019 grad: 0.0944 (0.0478) loss: 0.9909 (0.9941) time: 0.1738 data: 0.0890 max mem: 9377 +Train: [0] [4900/6250] eta: 0:03:22 lr: 0.000020 grad: 0.0900 (0.0487) loss: 0.9902 (0.9940) time: 0.1597 data: 0.0663 max mem: 9377 +Train: [0] [5000/6250] eta: 0:03:08 lr: 0.000020 grad: 0.0825 (0.0497) loss: 0.9894 (0.9940) time: 0.1879 data: 0.1036 max mem: 9377 +Train: [0] [5100/6250] eta: 0:02:53 lr: 0.000020 grad: 0.0868 (0.0505) loss: 0.9904 (0.9939) time: 0.1540 data: 0.0629 max mem: 9377 +Train: [0] [5200/6250] eta: 0:02:38 lr: 0.000021 grad: 0.0742 (0.0514) loss: 0.9908 (0.9938) time: 0.1599 data: 0.0687 max mem: 9377 +Train: [0] [5300/6250] eta: 0:02:23 lr: 0.000021 grad: 0.0766 (0.0521) loss: 0.9901 (0.9937) time: 0.1413 data: 0.0613 max mem: 9377 +Train: [0] [5400/6250] eta: 0:02:08 lr: 0.000022 grad: 0.0923 (0.0530) loss: 0.9899 (0.9937) time: 0.1461 data: 0.0539 max mem: 9377 +Train: [0] [5500/6250] eta: 0:01:53 lr: 0.000022 grad: 0.1015 (0.0539) loss: 0.9886 (0.9936) time: 0.1857 data: 0.1058 max mem: 9377 +Train: [0] [5600/6250] eta: 0:01:38 lr: 0.000022 grad: 0.0824 (0.0546) loss: 0.9888 (0.9935) time: 0.1465 data: 0.0556 max mem: 9377 +Train: [0] [5700/6250] eta: 0:01:23 lr: 0.000023 grad: 0.0833 (0.0553) loss: 0.9903 (0.9934) time: 0.1469 data: 0.0593 max mem: 9377 +Train: [0] [5800/6250] eta: 0:01:08 lr: 0.000023 grad: 0.0782 (0.0560) loss: 0.9898 (0.9934) time: 0.1838 data: 0.0998 max mem: 9377 +Train: [0] [5900/6250] eta: 0:00:52 lr: 0.000024 grad: 0.0916 (0.0567) loss: 0.9885 (0.9933) time: 0.1511 data: 0.0576 max mem: 9377 +Train: [0] [6000/6250] eta: 0:00:37 lr: 0.000024 grad: 0.0886 (0.0574) loss: 0.9890 (0.9932) time: 0.1626 data: 0.0775 max mem: 9377 +Train: [0] [6100/6250] eta: 0:00:22 lr: 0.000024 grad: 0.0830 (0.0580) loss: 0.9885 (0.9931) time: 0.1889 data: 0.1146 max mem: 9377 +Train: [0] [6200/6250] eta: 0:00:07 lr: 0.000025 grad: 0.0917 (0.0586) loss: 0.9881 (0.9931) time: 0.1507 data: 0.0671 max mem: 9377 +Train: [0] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.0865 (0.0590) loss: 0.9868 (0.9930) time: 0.1703 data: 0.0905 max mem: 9377 +Train: [0] Total time: 0:15:55 (0.1529 s / it) +Averaged stats: lr: 0.000025 grad: 0.0865 (0.0590) loss: 0.9868 (0.9930) +Eval (hcp-train-subset): [0] [ 0/62] eta: 0:03:46 loss: 0.9850 (0.9850) time: 3.6589 data: 3.5676 max mem: 9377 +Eval (hcp-train-subset): [0] [61/62] eta: 0:00:00 loss: 0.9928 (0.9914) time: 0.1194 data: 0.0893 max mem: 9377 +Eval (hcp-train-subset): [0] Total time: 0:00:13 (0.2186 s / it) +Averaged stats (hcp-train-subset): loss: 0.9928 (0.9914) +Eval (hcp-val): [0] [ 0/62] eta: 0:04:49 loss: 0.9879 (0.9879) time: 4.6640 data: 4.6355 max mem: 9377 +Eval (hcp-val): [0] [61/62] eta: 0:00:00 loss: 0.9904 (0.9909) time: 0.1439 data: 0.1192 max mem: 9377 +Eval (hcp-val): [0] Total time: 0:00:13 (0.2189 s / it) +Averaged stats (hcp-val): loss: 0.9904 (0.9909) +Eval (nsd-val): [0] [ 0/62] eta: 0:04:39 loss: 0.9950 (0.9950) time: 4.5075 data: 4.4423 max mem: 9377 +Eval (nsd-val): [0] [61/62] eta: 0:00:00 loss: 0.9928 (0.9919) time: 0.1395 data: 0.1141 max mem: 9377 +Eval (nsd-val): [0] Total time: 0:00:14 (0.2288 s / it) +Averaged stats (nsd-val): loss: 0.9928 (0.9919) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +Train: [1] [ 0/6250] eta: 10:53:12 lr: 0.000025 grad: 0.0618 (0.0618) loss: 0.9881 (0.9881) time: 6.2708 data: 6.1254 max mem: 9377 +Train: [1] [ 100/6250] eta: 0:24:13 lr: 0.000025 grad: 0.0723 (0.0916) loss: 0.9909 (0.9899) time: 0.1376 data: 0.0553 max mem: 9377 +Train: [1] [ 200/6250] eta: 0:22:38 lr: 0.000026 grad: 0.0742 (0.0899) loss: 0.9897 (0.9896) time: 0.1968 data: 0.0926 max mem: 9377 +Train: [1] [ 300/6250] eta: 0:21:40 lr: 0.000026 grad: 0.0875 (0.0903) loss: 0.9888 (0.9893) time: 0.1705 data: 0.0494 max mem: 9377 +Train: [1] [ 400/6250] eta: 0:21:37 lr: 0.000027 grad: 0.0738 (0.0887) loss: 0.9886 (0.9893) time: 0.2504 data: 0.1276 max mem: 9377 +Train: [1] [ 500/6250] eta: 0:20:39 lr: 0.000027 grad: 0.0870 (0.0886) loss: 0.9885 (0.9892) time: 0.1950 data: 0.1034 max mem: 9377 +Train: [1] [ 600/6250] eta: 0:19:55 lr: 0.000027 grad: 0.0783 (0.0890) loss: 0.9889 (0.9891) time: 0.1771 data: 0.0766 max mem: 9377 +Train: [1] [ 700/6250] eta: 0:19:11 lr: 0.000028 grad: 0.0896 (0.0896) loss: 0.9882 (0.9890) time: 0.1654 data: 0.0589 max mem: 9377 +Train: [1] [ 800/6250] eta: 0:18:37 lr: 0.000028 grad: 0.0934 (0.0902) loss: 0.9868 (0.9888) time: 0.1872 data: 0.0865 max mem: 9377 +Train: [1] [ 900/6250] eta: 0:18:04 lr: 0.000029 grad: 0.0893 (0.0903) loss: 0.9891 (0.9888) time: 0.1903 data: 0.0952 max mem: 9377 +Train: [1] [1000/6250] eta: 0:17:27 lr: 0.000029 grad: 0.0902 (0.0906) loss: 0.9881 (0.9887) time: 0.1608 data: 0.0580 max mem: 9377 +Train: [1] [1100/6250] eta: 0:16:54 lr: 0.000029 grad: 0.0830 (0.0909) loss: 0.9871 (0.9886) time: 0.1562 data: 0.0633 max mem: 9377 +Train: [1] [1200/6250] eta: 0:16:21 lr: 0.000030 grad: 0.0866 (0.0912) loss: 0.9862 (0.9886) time: 0.1552 data: 0.0607 max mem: 9377 +Train: [1] [1300/6250] eta: 0:15:56 lr: 0.000030 grad: 0.0912 (0.0920) loss: 0.9868 (0.9884) time: 0.1751 data: 0.0803 max mem: 9377 +Train: [1] [1400/6250] eta: 0:15:28 lr: 0.000031 grad: 0.0829 (0.0920) loss: 0.9895 (0.9884) time: 0.1827 data: 0.0977 max mem: 9377 +Train: [1] [1500/6250] eta: 0:14:59 lr: 0.000031 grad: 0.0961 (0.0925) loss: 0.9871 (0.9883) time: 0.1616 data: 0.0765 max mem: 9377 +Train: [1] [1600/6250] eta: 0:14:36 lr: 0.000031 grad: 0.1014 (0.0931) loss: 0.9873 (0.9882) time: 0.1850 data: 0.1026 max mem: 9377 +Train: [1] [1700/6250] eta: 0:14:12 lr: 0.000032 grad: 0.1001 (0.0931) loss: 0.9871 (0.9882) time: 0.1454 data: 0.0579 max mem: 9377 +Train: [1] [1800/6250] eta: 0:13:48 lr: 0.000032 grad: 0.0890 (0.0933) loss: 0.9876 (0.9881) time: 0.1711 data: 0.0732 max mem: 9377 +Train: [1] [1900/6250] eta: 0:13:26 lr: 0.000033 grad: 0.1003 (0.0935) loss: 0.9871 (0.9881) time: 0.1781 data: 0.0935 max mem: 9377 +Train: [1] [2000/6250] eta: 0:13:03 lr: 0.000033 grad: 0.0858 (0.0939) loss: 0.9863 (0.9880) time: 0.1744 data: 0.0765 max mem: 9377 +Train: [1] [2100/6250] eta: 0:12:40 lr: 0.000033 grad: 0.0974 (0.0941) loss: 0.9872 (0.9879) time: 0.1669 data: 0.0806 max mem: 9377 +Train: [1] [2200/6250] eta: 0:12:19 lr: 0.000034 grad: 0.0986 (0.0945) loss: 0.9864 (0.9878) time: 0.1616 data: 0.0604 max mem: 9377 +Train: [1] [2300/6250] eta: 0:11:58 lr: 0.000034 grad: 0.0885 (0.0949) loss: 0.9875 (0.9877) time: 0.1528 data: 0.0656 max mem: 9377 +Train: [1] [2400/6250] eta: 0:11:37 lr: 0.000035 grad: 0.0949 (0.0952) loss: 0.9870 (0.9876) time: 0.1684 data: 0.0802 max mem: 9377 +Train: [1] [2500/6250] eta: 0:11:15 lr: 0.000035 grad: 0.1041 (0.0958) loss: 0.9856 (0.9875) time: 0.1421 data: 0.0509 max mem: 9377 +Train: [1] [2600/6250] eta: 0:10:56 lr: 0.000035 grad: 0.1052 (0.0960) loss: 0.9861 (0.9875) time: 0.1725 data: 0.0828 max mem: 9377 +Train: [1] [2700/6250] eta: 0:10:37 lr: 0.000036 grad: 0.1009 (0.0963) loss: 0.9832 (0.9874) time: 0.1694 data: 0.0813 max mem: 9377 +Train: [1] [2800/6250] eta: 0:10:18 lr: 0.000036 grad: 0.0998 (0.0964) loss: 0.9868 (0.9874) time: 0.1726 data: 0.0950 max mem: 9377 +Train: [1] [2900/6250] eta: 0:09:59 lr: 0.000037 grad: 0.1064 (0.0967) loss: 0.9866 (0.9873) time: 0.1571 data: 0.0619 max mem: 9377 +Train: [1] [3000/6250] eta: 0:09:39 lr: 0.000037 grad: 0.0981 (0.0970) loss: 0.9869 (0.9872) time: 0.1464 data: 0.0634 max mem: 9377 +Train: [1] [3100/6250] eta: 0:09:20 lr: 0.000037 grad: 0.0977 (0.0971) loss: 0.9872 (0.9872) time: 0.1757 data: 0.0834 max mem: 9377 +Train: [1] [3200/6250] eta: 0:09:02 lr: 0.000038 grad: 0.0978 (0.0972) loss: 0.9860 (0.9871) time: 0.1762 data: 0.0906 max mem: 9377 +Train: [1] [3300/6250] eta: 0:08:43 lr: 0.000038 grad: 0.0891 (0.0973) loss: 0.9864 (0.9871) time: 0.1675 data: 0.0824 max mem: 9377 +Train: [1] [3400/6250] eta: 0:08:24 lr: 0.000039 grad: 0.0944 (0.0973) loss: 0.9849 (0.9870) time: 0.1422 data: 0.0503 max mem: 9377 +Train: [1] [3500/6250] eta: 0:08:05 lr: 0.000039 grad: 0.1049 (0.0975) loss: 0.9851 (0.9870) time: 0.1582 data: 0.0619 max mem: 9377 +Train: [1] [3600/6250] eta: 0:07:46 lr: 0.000039 grad: 0.1057 (0.0975) loss: 0.9876 (0.9870) time: 0.1961 data: 0.1098 max mem: 9377 +Train: [1] [3700/6250] eta: 0:07:28 lr: 0.000040 grad: 0.0893 (0.0975) loss: 0.9857 (0.9870) time: 0.1661 data: 0.0843 max mem: 9377 +Train: [1] [3800/6250] eta: 0:07:09 lr: 0.000040 grad: 0.1030 (0.0975) loss: 0.9862 (0.9870) time: 0.1618 data: 0.0708 max mem: 9377 +Train: [1] [3900/6250] eta: 0:06:51 lr: 0.000041 grad: 0.0870 (0.0974) loss: 0.9863 (0.9870) time: 0.1622 data: 0.0698 max mem: 9377 +Train: [1] [4000/6250] eta: 0:06:32 lr: 0.000041 grad: 0.0887 (0.0974) loss: 0.9886 (0.9869) time: 0.1330 data: 0.0522 max mem: 9377 +Train: [1] [4100/6250] eta: 0:06:14 lr: 0.000041 grad: 0.0860 (0.0974) loss: 0.9876 (0.9869) time: 0.1868 data: 0.0992 max mem: 9377 +Train: [1] [4200/6250] eta: 0:05:56 lr: 0.000042 grad: 0.0828 (0.0972) loss: 0.9868 (0.9869) time: 0.1476 data: 0.0552 max mem: 9377 +Train: [1] [4300/6250] eta: 0:05:39 lr: 0.000042 grad: 0.0898 (0.0973) loss: 0.9853 (0.9869) time: 0.1502 data: 0.0574 max mem: 9377 +Train: [1] [4400/6250] eta: 0:05:21 lr: 0.000043 grad: 0.0978 (0.0974) loss: 0.9856 (0.9869) time: 0.1592 data: 0.0782 max mem: 9377 +Train: [1] [4500/6250] eta: 0:05:03 lr: 0.000043 grad: 0.0958 (0.0975) loss: 0.9869 (0.9868) time: 0.1492 data: 0.0596 max mem: 9377 +Train: [1] [4600/6250] eta: 0:04:45 lr: 0.000043 grad: 0.0953 (0.0977) loss: 0.9860 (0.9868) time: 0.1615 data: 0.0763 max mem: 9377 +Train: [1] [4700/6250] eta: 0:04:27 lr: 0.000044 grad: 0.1086 (0.0979) loss: 0.9857 (0.9868) time: 0.1849 data: 0.0950 max mem: 9377 +Train: [1] [4800/6250] eta: 0:04:10 lr: 0.000044 grad: 0.0946 (0.0981) loss: 0.9852 (0.9867) time: 0.1588 data: 0.0777 max mem: 9377 +Train: [1] [4900/6250] eta: 0:03:52 lr: 0.000045 grad: 0.0817 (0.0981) loss: 0.9879 (0.9867) time: 0.1491 data: 0.0538 max mem: 9377 +Train: [1] [5000/6250] eta: 0:03:34 lr: 0.000045 grad: 0.0954 (0.0982) loss: 0.9854 (0.9867) time: 0.1499 data: 0.0639 max mem: 9377 +Train: [1] [5100/6250] eta: 0:03:17 lr: 0.000045 grad: 0.0995 (0.0983) loss: 0.9862 (0.9867) time: 0.1428 data: 0.0586 max mem: 9377 +Train: [1] [5200/6250] eta: 0:02:59 lr: 0.000046 grad: 0.0888 (0.0983) loss: 0.9884 (0.9866) time: 0.1599 data: 0.0720 max mem: 9377 +Train: [1] [5300/6250] eta: 0:02:42 lr: 0.000046 grad: 0.1071 (0.0984) loss: 0.9860 (0.9866) time: 0.1493 data: 0.0626 max mem: 9377 +Train: [1] [5400/6250] eta: 0:02:24 lr: 0.000047 grad: 0.0975 (0.0984) loss: 0.9865 (0.9866) time: 0.1688 data: 0.0807 max mem: 9377 +Train: [1] [5500/6250] eta: 0:02:07 lr: 0.000047 grad: 0.1071 (0.0984) loss: 0.9842 (0.9866) time: 0.1651 data: 0.0748 max mem: 9377 +Train: [1] [5600/6250] eta: 0:01:50 lr: 0.000047 grad: 0.0883 (0.0984) loss: 0.9848 (0.9865) time: 0.1542 data: 0.0614 max mem: 9377 +Train: [1] [5700/6250] eta: 0:01:33 lr: 0.000048 grad: 0.1009 (0.0987) loss: 0.9830 (0.9865) time: 0.1562 data: 0.0705 max mem: 9377 +Train: [1] [5800/6250] eta: 0:01:16 lr: 0.000048 grad: 0.0959 (0.0988) loss: 0.9846 (0.9864) time: 0.1494 data: 0.0528 max mem: 9377 +Train: [1] [5900/6250] eta: 0:00:59 lr: 0.000049 grad: 0.1117 (0.0990) loss: 0.9836 (0.9864) time: 0.1402 data: 0.0524 max mem: 9377 +Train: [1] [6000/6250] eta: 0:00:42 lr: 0.000049 grad: 0.0864 (0.0992) loss: 0.9851 (0.9863) time: 0.1411 data: 0.0538 max mem: 9377 +Train: [1] [6100/6250] eta: 0:00:25 lr: 0.000049 grad: 0.1011 (0.0994) loss: 0.9841 (0.9863) time: 0.1776 data: 0.0935 max mem: 9377 +Train: [1] [6200/6250] eta: 0:00:08 lr: 0.000050 grad: 0.1036 (0.0995) loss: 0.9849 (0.9862) time: 0.1873 data: 0.0955 max mem: 9377 +Train: [1] [6249/6250] eta: 0:00:00 lr: 0.000050 grad: 0.0861 (0.0996) loss: 0.9845 (0.9862) time: 0.2283 data: 0.1337 max mem: 9377 +Train: [1] Total time: 0:17:46 (0.1707 s / it) +Averaged stats: lr: 0.000050 grad: 0.0861 (0.0996) loss: 0.9845 (0.9862) +Eval (hcp-train-subset): [1] [ 0/62] eta: 0:04:43 loss: 0.9901 (0.9901) time: 4.5796 data: 4.4990 max mem: 9377 +Eval (hcp-train-subset): [1] [61/62] eta: 0:00:00 loss: 0.9887 (0.9879) time: 0.1870 data: 0.1607 max mem: 9377 +Eval (hcp-train-subset): [1] Total time: 0:00:17 (0.2858 s / it) +Averaged stats (hcp-train-subset): loss: 0.9887 (0.9879) +Eval (hcp-val): [1] [ 0/62] eta: 0:05:00 loss: 0.9871 (0.9871) time: 4.8482 data: 4.7682 max mem: 9377 +Eval (hcp-val): [1] [61/62] eta: 0:00:00 loss: 0.9879 (0.9878) time: 0.1936 data: 0.1655 max mem: 9377 +Eval (hcp-val): [1] Total time: 0:00:16 (0.2694 s / it) +Averaged stats (hcp-val): loss: 0.9879 (0.9878) +Eval (nsd-val): [1] [ 0/62] eta: 0:06:24 loss: 0.9874 (0.9874) time: 6.2036 data: 6.1745 max mem: 9377 +Eval (nsd-val): [1] [61/62] eta: 0:00:00 loss: 0.9896 (0.9887) time: 0.1372 data: 0.1117 max mem: 9377 +Eval (nsd-val): [1] Total time: 0:00:17 (0.2853 s / it) +Averaged stats (nsd-val): loss: 0.9896 (0.9887) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +Train: [2] [ 0/6250] eta: 10:36:19 lr: 0.000050 grad: 0.0511 (0.0511) loss: 0.9869 (0.9869) time: 6.1087 data: 5.9171 max mem: 9377 +Train: [2] [ 100/6250] eta: 0:26:12 lr: 0.000050 grad: 0.0835 (0.1008) loss: 0.9849 (0.9851) time: 0.2174 data: 0.1257 max mem: 9377 +Train: [2] [ 200/6250] eta: 0:22:21 lr: 0.000051 grad: 0.0979 (0.1047) loss: 0.9826 (0.9842) time: 0.2019 data: 0.0928 max mem: 9377 +Train: [2] [ 300/6250] eta: 0:20:52 lr: 0.000051 grad: 0.0993 (0.1026) loss: 0.9838 (0.9840) time: 0.1725 data: 0.0755 max mem: 9377 +Train: [2] [ 400/6250] eta: 0:20:19 lr: 0.000052 grad: 0.0810 (0.1002) loss: 0.9859 (0.9842) time: 0.1646 data: 0.0596 max mem: 9377 +Train: [2] [ 500/6250] eta: 0:19:59 lr: 0.000052 grad: 0.0894 (0.0992) loss: 0.9827 (0.9843) time: 0.2391 data: 0.1372 max mem: 9377 +Train: [2] [ 600/6250] eta: 0:19:39 lr: 0.000052 grad: 0.0805 (0.0987) loss: 0.9854 (0.9844) time: 0.2096 data: 0.0980 max mem: 9377 +Train: [2] [ 700/6250] eta: 0:19:11 lr: 0.000053 grad: 0.0894 (0.0994) loss: 0.9850 (0.9844) time: 0.1690 data: 0.0562 max mem: 9377 +Train: [2] [ 800/6250] eta: 0:18:34 lr: 0.000053 grad: 0.0852 (0.0993) loss: 0.9874 (0.9845) time: 0.1771 data: 0.0679 max mem: 9377 +Train: [2] [ 900/6250] eta: 0:17:53 lr: 0.000054 grad: 0.1020 (0.0999) loss: 0.9835 (0.9845) time: 0.1588 data: 0.0617 max mem: 9377 +Train: [2] [1000/6250] eta: 0:17:14 lr: 0.000054 grad: 0.1001 (0.1000) loss: 0.9830 (0.9844) time: 0.1797 data: 0.0787 max mem: 9377 +Train: [2] [1100/6250] eta: 0:16:43 lr: 0.000054 grad: 0.1096 (0.1008) loss: 0.9834 (0.9844) time: 0.1898 data: 0.1073 max mem: 9377 +Train: [2] [1200/6250] eta: 0:16:14 lr: 0.000055 grad: 0.0882 (0.1008) loss: 0.9845 (0.9844) time: 0.1705 data: 0.0847 max mem: 9377 +Train: [2] [1300/6250] eta: 0:15:46 lr: 0.000055 grad: 0.0985 (0.1006) loss: 0.9839 (0.9844) time: 0.1747 data: 0.0778 max mem: 9377 +Train: [2] [1400/6250] eta: 0:15:15 lr: 0.000056 grad: 0.0902 (0.1010) loss: 0.9861 (0.9844) time: 0.1620 data: 0.0738 max mem: 9377 +Train: [2] [1500/6250] eta: 0:14:47 lr: 0.000056 grad: 0.0870 (0.1013) loss: 0.9834 (0.9844) time: 0.1406 data: 0.0491 max mem: 9377 +Train: [2] [1600/6250] eta: 0:14:22 lr: 0.000056 grad: 0.1007 (0.1017) loss: 0.9840 (0.9843) time: 0.1843 data: 0.0968 max mem: 9377 +Train: [2] [1700/6250] eta: 0:13:58 lr: 0.000057 grad: 0.1100 (0.1020) loss: 0.9815 (0.9842) time: 0.1583 data: 0.0760 max mem: 9377 +Train: [2] [1800/6250] eta: 0:13:34 lr: 0.000057 grad: 0.1175 (0.1027) loss: 0.9828 (0.9841) time: 0.1695 data: 0.0787 max mem: 9377 +Train: [2] [1900/6250] eta: 0:13:09 lr: 0.000058 grad: 0.0928 (0.1029) loss: 0.9834 (0.9841) time: 0.1498 data: 0.0629 max mem: 9377 +Train: [2] [2000/6250] eta: 0:12:48 lr: 0.000058 grad: 0.0993 (0.1030) loss: 0.9829 (0.9840) time: 0.1717 data: 0.0879 max mem: 9377 +Train: [2] [2100/6250] eta: 0:12:26 lr: 0.000058 grad: 0.0958 (0.1031) loss: 0.9819 (0.9840) time: 0.1604 data: 0.0816 max mem: 9377 +Train: [2] [2200/6250] eta: 0:12:04 lr: 0.000059 grad: 0.0997 (0.1033) loss: 0.9831 (0.9840) time: 0.1519 data: 0.0628 max mem: 9377 +Train: [2] [2300/6250] eta: 0:11:44 lr: 0.000059 grad: 0.0960 (0.1034) loss: 0.9843 (0.9839) time: 0.1352 data: 0.0444 max mem: 9377 +Train: [2] [2400/6250] eta: 0:11:23 lr: 0.000060 grad: 0.0972 (0.1036) loss: 0.9827 (0.9839) time: 0.1679 data: 0.0833 max mem: 9377 +Train: [2] [2500/6250] eta: 0:11:03 lr: 0.000060 grad: 0.1033 (0.1039) loss: 0.9835 (0.9839) time: 0.1627 data: 0.0740 max mem: 9377 +Train: [2] [2600/6250] eta: 0:10:42 lr: 0.000060 grad: 0.1060 (0.1042) loss: 0.9860 (0.9839) time: 0.1460 data: 0.0597 max mem: 9377 +Train: [2] [2700/6250] eta: 0:10:22 lr: 0.000061 grad: 0.1084 (0.1042) loss: 0.9837 (0.9839) time: 0.1368 data: 0.0494 max mem: 9377 +Train: [2] [2800/6250] eta: 0:10:03 lr: 0.000061 grad: 0.0937 (0.1042) loss: 0.9831 (0.9839) time: 0.1721 data: 0.0831 max mem: 9377 +Train: [2] [2900/6250] eta: 0:09:43 lr: 0.000062 grad: 0.0944 (0.1043) loss: 0.9829 (0.9839) time: 0.1576 data: 0.0617 max mem: 9377 +Train: [2] [3000/6250] eta: 0:09:24 lr: 0.000062 grad: 0.1040 (0.1045) loss: 0.9838 (0.9839) time: 0.1612 data: 0.0781 max mem: 9377 +Train: [2] [3100/6250] eta: 0:09:05 lr: 0.000062 grad: 0.0898 (0.1046) loss: 0.9841 (0.9839) time: 0.1691 data: 0.0853 max mem: 9377 +Train: [2] [3200/6250] eta: 0:08:47 lr: 0.000063 grad: 0.1032 (0.1047) loss: 0.9826 (0.9839) time: 0.1403 data: 0.0522 max mem: 9377 +Train: [2] [3300/6250] eta: 0:08:29 lr: 0.000063 grad: 0.0960 (0.1050) loss: 0.9830 (0.9838) time: 0.1558 data: 0.0739 max mem: 9377 +Train: [2] [3400/6250] eta: 0:08:10 lr: 0.000064 grad: 0.1057 (0.1052) loss: 0.9832 (0.9838) time: 0.1551 data: 0.0677 max mem: 9377 +Train: [2] [3500/6250] eta: 0:07:52 lr: 0.000064 grad: 0.1167 (0.1057) loss: 0.9838 (0.9837) time: 0.1630 data: 0.0707 max mem: 9377 +Train: [2] [3600/6250] eta: 0:07:33 lr: 0.000064 grad: 0.0964 (0.1058) loss: 0.9807 (0.9837) time: 0.1234 data: 0.0308 max mem: 9377 +Train: [2] [3700/6250] eta: 0:07:16 lr: 0.000065 grad: 0.1239 (0.1063) loss: 0.9821 (0.9837) time: 0.1489 data: 0.0606 max mem: 9377 +Train: [2] [3800/6250] eta: 0:06:57 lr: 0.000065 grad: 0.0993 (0.1067) loss: 0.9829 (0.9836) time: 0.1422 data: 0.0496 max mem: 9377 +Train: [2] [3900/6250] eta: 0:06:40 lr: 0.000066 grad: 0.0870 (0.1072) loss: 0.9844 (0.9836) time: 0.1603 data: 0.0755 max mem: 9377 +Train: [2] [4000/6250] eta: 0:06:21 lr: 0.000066 grad: 0.1302 (0.1074) loss: 0.9831 (0.9836) time: 0.1622 data: 0.0840 max mem: 9377 +Train: [2] [4100/6250] eta: 0:06:04 lr: 0.000066 grad: 0.1129 (0.1078) loss: 0.9828 (0.9835) time: 0.1546 data: 0.0733 max mem: 9377 +Train: [2] [4200/6250] eta: 0:05:46 lr: 0.000067 grad: 0.1090 (0.1081) loss: 0.9833 (0.9835) time: 0.1669 data: 0.0820 max mem: 9377 +Train: [2] [4300/6250] eta: 0:05:29 lr: 0.000067 grad: 0.1219 (0.1089) loss: 0.9823 (0.9834) time: 0.1517 data: 0.0594 max mem: 9377 +Train: [2] [4400/6250] eta: 0:05:11 lr: 0.000068 grad: 0.1776 (0.1105) loss: 0.9822 (0.9834) time: 0.1364 data: 0.0485 max mem: 9377 +Train: [2] [4500/6250] eta: 0:04:54 lr: 0.000068 grad: 0.1745 (0.1119) loss: 0.9784 (0.9833) time: 0.2545 data: 0.1698 max mem: 9377 +Train: [2] [4600/6250] eta: 0:04:37 lr: 0.000068 grad: 0.1819 (0.1136) loss: 0.9822 (0.9832) time: 0.1484 data: 0.0612 max mem: 9377 +Train: [2] [4700/6250] eta: 0:04:19 lr: 0.000069 grad: 0.1423 (0.1149) loss: 0.9786 (0.9831) time: 0.1532 data: 0.0586 max mem: 9377 +Train: [2] [4800/6250] eta: 0:04:02 lr: 0.000069 grad: 0.1494 (0.1165) loss: 0.9804 (0.9830) time: 0.1411 data: 0.0521 max mem: 9377 +Train: [2] [4900/6250] eta: 0:03:45 lr: 0.000070 grad: 0.1282 (0.1174) loss: 0.9809 (0.9829) time: 0.1593 data: 0.0788 max mem: 9377 +Train: [2] [5000/6250] eta: 0:03:28 lr: 0.000070 grad: 0.1753 (0.1188) loss: 0.9799 (0.9828) time: 0.1591 data: 0.0688 max mem: 9377 +Train: [2] [5100/6250] eta: 0:03:12 lr: 0.000070 grad: 0.1850 (0.1203) loss: 0.9773 (0.9827) time: 0.1590 data: 0.0747 max mem: 9377 +Train: [2] [5200/6250] eta: 0:02:54 lr: 0.000071 grad: 0.1872 (0.1224) loss: 0.9776 (0.9826) time: 0.1515 data: 0.0540 max mem: 9377 +Train: [2] [5300/6250] eta: 0:02:38 lr: 0.000071 grad: 0.1739 (0.1241) loss: 0.9806 (0.9825) time: 0.1718 data: 0.0876 max mem: 9377 +Train: [2] [5400/6250] eta: 0:02:21 lr: 0.000072 grad: 0.1627 (0.1261) loss: 0.9792 (0.9824) time: 0.1664 data: 0.0789 max mem: 9377 +Train: [2] [5500/6250] eta: 0:02:04 lr: 0.000072 grad: 0.1799 (0.1278) loss: 0.9739 (0.9824) time: 0.2003 data: 0.1189 max mem: 9377 +Train: [2] [5600/6250] eta: 0:01:47 lr: 0.000072 grad: 0.1262 (0.1290) loss: 0.9785 (0.9823) time: 0.1995 data: 0.1136 max mem: 9377 +Train: [2] [5700/6250] eta: 0:01:31 lr: 0.000073 grad: 0.1873 (0.1305) loss: 0.9770 (0.9822) time: 0.1869 data: 0.0989 max mem: 9377 +Train: [2] [5800/6250] eta: 0:01:14 lr: 0.000073 grad: 0.1576 (0.1321) loss: 0.9808 (0.9821) time: 0.1522 data: 0.0593 max mem: 9377 +Train: [2] [5900/6250] eta: 0:00:57 lr: 0.000074 grad: 0.1577 (0.1337) loss: 0.9766 (0.9820) time: 0.1475 data: 0.0635 max mem: 9377 +Train: [2] [6000/6250] eta: 0:00:41 lr: 0.000074 grad: 0.1936 (0.1357) loss: 0.9777 (0.9820) time: 0.1337 data: 0.0420 max mem: 9377 +Train: [2] [6100/6250] eta: 0:00:24 lr: 0.000074 grad: 0.1879 (0.1368) loss: 0.9756 (0.9819) time: 0.1701 data: 0.0805 max mem: 9377 +Train: [2] [6200/6250] eta: 0:00:08 lr: 0.000075 grad: 0.1984 (0.1381) loss: 0.9764 (0.9818) time: 0.1781 data: 0.0913 max mem: 9377 +Train: [2] [6249/6250] eta: 0:00:00 lr: 0.000075 grad: 0.1649 (0.1386) loss: 0.9770 (0.9817) time: 0.1660 data: 0.0829 max mem: 9377 +Train: [2] Total time: 0:17:21 (0.1666 s / it) +Averaged stats: lr: 0.000075 grad: 0.1649 (0.1386) loss: 0.9770 (0.9817) +Eval (hcp-train-subset): [2] [ 0/62] eta: 0:05:49 loss: 0.9848 (0.9848) time: 5.6370 data: 5.6029 max mem: 9377 +Eval (hcp-train-subset): [2] [61/62] eta: 0:00:00 loss: 0.9814 (0.9826) time: 0.1532 data: 0.1279 max mem: 9377 +Eval (hcp-train-subset): [2] Total time: 0:00:15 (0.2467 s / it) +Averaged stats (hcp-train-subset): loss: 0.9814 (0.9826) +Eval (hcp-val): [2] [ 0/62] eta: 0:05:24 loss: 0.9784 (0.9784) time: 5.2347 data: 5.2054 max mem: 9377 +Eval (hcp-val): [2] [61/62] eta: 0:00:00 loss: 0.9808 (0.9823) time: 0.1474 data: 0.1214 max mem: 9377 +Eval (hcp-val): [2] Total time: 0:00:16 (0.2605 s / it) +Averaged stats (hcp-val): loss: 0.9808 (0.9823) +Eval (nsd-val): [2] [ 0/62] eta: 0:03:45 loss: 0.9769 (0.9769) time: 3.6353 data: 3.5555 max mem: 9377 +Eval (nsd-val): [2] [61/62] eta: 0:00:00 loss: 0.9825 (0.9822) time: 0.1197 data: 0.0942 max mem: 9377 +Eval (nsd-val): [2] Total time: 0:00:16 (0.2607 s / it) +Averaged stats (nsd-val): loss: 0.9825 (0.9822) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +Train: [3] [ 0/6250] eta: 11:03:59 lr: 0.000075 grad: 0.1909 (0.1909) loss: 0.9767 (0.9767) time: 6.3743 data: 6.2325 max mem: 9377 +Train: [3] [ 100/6250] eta: 0:25:44 lr: 0.000075 grad: 0.1686 (0.2852) loss: 0.9767 (0.9768) time: 0.1980 data: 0.1095 max mem: 9377 +Train: [3] [ 200/6250] eta: 0:21:09 lr: 0.000076 grad: 0.1938 (0.2794) loss: 0.9736 (0.9749) time: 0.1836 data: 0.1071 max mem: 9377 +Train: [3] [ 300/6250] eta: 0:19:45 lr: 0.000076 grad: 0.2544 (0.2625) loss: 0.9770 (0.9743) time: 0.2128 data: 0.1147 max mem: 9377 +Train: [3] [ 400/6250] eta: 0:18:33 lr: 0.000077 grad: 0.1672 (0.2509) loss: 0.9767 (0.9748) time: 0.1651 data: 0.0545 max mem: 9377 +Train: [3] [ 500/6250] eta: 0:18:01 lr: 0.000077 grad: 0.2261 (0.2424) loss: 0.9761 (0.9752) time: 0.1840 data: 0.0923 max mem: 9377 +Train: [3] [ 600/6250] eta: 0:17:25 lr: 0.000077 grad: 0.1850 (0.2396) loss: 0.9746 (0.9752) time: 0.1760 data: 0.0728 max mem: 9377 +Train: [3] [ 700/6250] eta: 0:16:59 lr: 0.000078 grad: 0.2007 (0.2366) loss: 0.9779 (0.9754) time: 0.1564 data: 0.0460 max mem: 9377 +Train: [3] [ 800/6250] eta: 0:16:49 lr: 0.000078 grad: 0.1687 (0.2390) loss: 0.9768 (0.9753) time: 0.1987 data: 0.0665 max mem: 9377 +Train: [3] [ 900/6250] eta: 0:16:28 lr: 0.000079 grad: 0.1677 (0.2407) loss: 0.9739 (0.9752) time: 0.2041 data: 0.0983 max mem: 9377 +Train: [3] [1000/6250] eta: 0:16:07 lr: 0.000079 grad: 0.1868 (0.2388) loss: 0.9731 (0.9751) time: 0.1930 data: 0.1040 max mem: 9377 +Train: [3] [1100/6250] eta: 0:15:38 lr: 0.000079 grad: 0.1426 (0.2363) loss: 0.9777 (0.9751) time: 0.1689 data: 0.0832 max mem: 9377 +Train: [3] [1200/6250] eta: 0:15:13 lr: 0.000080 grad: 0.2397 (0.2375) loss: 0.9744 (0.9751) time: 0.1450 data: 0.0515 max mem: 9377 +Train: [3] [1300/6250] eta: 0:14:47 lr: 0.000080 grad: 0.1548 (0.2354) loss: 0.9756 (0.9751) time: 0.1528 data: 0.0673 max mem: 9377 +Train: [3] [1400/6250] eta: 0:14:23 lr: 0.000081 grad: 0.2145 (0.2366) loss: 0.9782 (0.9751) time: 0.1445 data: 0.0595 max mem: 9377 +Train: [3] [1500/6250] eta: 0:14:01 lr: 0.000081 grad: 0.1862 (0.2350) loss: 0.9743 (0.9751) time: 0.1705 data: 0.0802 max mem: 9377 +Train: [3] [1600/6250] eta: 0:13:36 lr: 0.000081 grad: 0.2386 (0.2355) loss: 0.9782 (0.9751) time: 0.1677 data: 0.0756 max mem: 9377 +Train: [3] [1700/6250] eta: 0:13:13 lr: 0.000082 grad: 0.1485 (0.2346) loss: 0.9743 (0.9751) time: 0.1581 data: 0.0751 max mem: 9377 +Train: [3] [1800/6250] eta: 0:12:51 lr: 0.000082 grad: 0.1586 (0.2341) loss: 0.9733 (0.9750) time: 0.1603 data: 0.0671 max mem: 9377 +Train: [3] [1900/6250] eta: 0:12:27 lr: 0.000083 grad: 0.2183 (0.2335) loss: 0.9726 (0.9748) time: 0.1268 data: 0.0418 max mem: 9377 +Train: [3] [2000/6250] eta: 0:12:07 lr: 0.000083 grad: 0.2049 (0.2345) loss: 0.9750 (0.9748) time: 0.1546 data: 0.0639 max mem: 9377 +Train: [3] [2100/6250] eta: 0:11:48 lr: 0.000083 grad: 0.1895 (0.2327) loss: 0.9724 (0.9747) time: 0.1431 data: 0.0614 max mem: 9377 +Train: [3] [2200/6250] eta: 0:11:30 lr: 0.000084 grad: 0.1971 (0.2327) loss: 0.9750 (0.9747) time: 0.1614 data: 0.0671 max mem: 9377 +Train: [3] [2300/6250] eta: 0:11:11 lr: 0.000084 grad: 0.1385 (0.2322) loss: 0.9751 (0.9747) time: 0.1738 data: 0.0827 max mem: 9377 +Train: [3] [2400/6250] eta: 0:10:51 lr: 0.000085 grad: 0.1627 (0.2319) loss: 0.9733 (0.9746) time: 0.1471 data: 0.0496 max mem: 9377 +Train: [3] [2500/6250] eta: 0:10:32 lr: 0.000085 grad: 0.1715 (0.2325) loss: 0.9713 (0.9746) time: 0.1691 data: 0.0872 max mem: 9377 +Train: [3] [2600/6250] eta: 0:10:14 lr: 0.000085 grad: 0.1994 (0.2326) loss: 0.9734 (0.9745) time: 0.1575 data: 0.0726 max mem: 9377 +Train: [3] [2700/6250] eta: 0:09:55 lr: 0.000086 grad: 0.1472 (0.2327) loss: 0.9752 (0.9744) time: 0.1573 data: 0.0660 max mem: 9377 +Train: [3] [2800/6250] eta: 0:09:38 lr: 0.000086 grad: 0.2310 (0.2331) loss: 0.9728 (0.9744) time: 0.1697 data: 0.0903 max mem: 9377 +Train: [3] [2900/6250] eta: 0:09:20 lr: 0.000087 grad: 0.2194 (0.2319) loss: 0.9775 (0.9743) time: 0.1374 data: 0.0522 max mem: 9377 +Train: [3] [3000/6250] eta: 0:09:02 lr: 0.000087 grad: 0.2168 (0.2314) loss: 0.9757 (0.9743) time: 0.1627 data: 0.0784 max mem: 9377 +Train: [3] [3100/6250] eta: 0:08:44 lr: 0.000087 grad: 0.1803 (0.2310) loss: 0.9745 (0.9743) time: 0.1544 data: 0.0648 max mem: 9377 +Train: [3] [3200/6250] eta: 0:08:26 lr: 0.000088 grad: 0.2178 (0.2308) loss: 0.9755 (0.9742) time: 0.1306 data: 0.0362 max mem: 9377 +Train: [3] [3300/6250] eta: 0:08:09 lr: 0.000088 grad: 0.1467 (0.2296) loss: 0.9730 (0.9742) time: 0.1507 data: 0.0671 max mem: 9377 +Train: [3] [3400/6250] eta: 0:07:51 lr: 0.000089 grad: 0.2296 (0.2294) loss: 0.9755 (0.9742) time: 0.1467 data: 0.0580 max mem: 9377 +Train: [3] [3500/6250] eta: 0:07:34 lr: 0.000089 grad: 0.1598 (0.2282) loss: 0.9729 (0.9741) time: 0.1280 data: 0.0395 max mem: 9377 +Train: [3] [3600/6250] eta: 0:07:17 lr: 0.000089 grad: 0.1830 (0.2271) loss: 0.9709 (0.9741) time: 0.1573 data: 0.0719 max mem: 9377 +Train: [3] [3700/6250] eta: 0:07:01 lr: 0.000090 grad: 0.1461 (0.2268) loss: 0.9741 (0.9740) time: 0.1687 data: 0.0771 max mem: 9377 +Train: [3] [3800/6250] eta: 0:06:44 lr: 0.000090 grad: 0.1587 (0.2259) loss: 0.9752 (0.9740) time: 0.1523 data: 0.0645 max mem: 9377 +Train: [3] [3900/6250] eta: 0:06:27 lr: 0.000091 grad: 0.1917 (0.2255) loss: 0.9787 (0.9741) time: 0.1611 data: 0.0749 max mem: 9377 +Train: [3] [4000/6250] eta: 0:06:10 lr: 0.000091 grad: 0.1499 (0.2253) loss: 0.9697 (0.9741) time: 0.1278 data: 0.0451 max mem: 9377 +Train: [3] [4100/6250] eta: 0:05:53 lr: 0.000091 grad: 0.2284 (0.2251) loss: 0.9750 (0.9740) time: 0.1469 data: 0.0563 max mem: 9377 +Train: [3] [4200/6250] eta: 0:05:37 lr: 0.000092 grad: 0.1521 (0.2238) loss: 0.9723 (0.9740) time: 0.1468 data: 0.0697 max mem: 9377 +Train: [3] [4300/6250] eta: 0:05:20 lr: 0.000092 grad: 0.1947 (0.2237) loss: 0.9729 (0.9740) time: 0.1469 data: 0.0613 max mem: 9377 +Train: [3] [4400/6250] eta: 0:05:03 lr: 0.000093 grad: 0.2377 (0.2228) loss: 0.9728 (0.9739) time: 0.1442 data: 0.0549 max mem: 9377 +Train: [3] [4500/6250] eta: 0:04:47 lr: 0.000093 grad: 0.1541 (0.2223) loss: 0.9712 (0.9739) time: 0.1716 data: 0.0773 max mem: 9377 +Train: [3] [4600/6250] eta: 0:04:31 lr: 0.000093 grad: 0.1625 (0.2222) loss: 0.9743 (0.9739) time: 0.1086 data: 0.0201 max mem: 9377 +Train: [3] [4700/6250] eta: 0:04:14 lr: 0.000094 grad: 0.1809 (0.2215) loss: 0.9733 (0.9739) time: 0.1291 data: 0.0443 max mem: 9377 +Train: [3] [4800/6250] eta: 0:03:57 lr: 0.000094 grad: 0.2134 (0.2213) loss: 0.9719 (0.9738) time: 0.1746 data: 0.0946 max mem: 9377 +Train: [3] [4900/6250] eta: 0:03:41 lr: 0.000095 grad: 0.2656 (0.2213) loss: 0.9700 (0.9737) time: 0.1452 data: 0.0561 max mem: 9377 +Train: [3] [5000/6250] eta: 0:03:24 lr: 0.000095 grad: 0.1676 (0.2209) loss: 0.9695 (0.9737) time: 0.1641 data: 0.0817 max mem: 9377 +Train: [3] [5100/6250] eta: 0:03:08 lr: 0.000095 grad: 0.1816 (0.2205) loss: 0.9727 (0.9736) time: 0.1905 data: 0.1085 max mem: 9377 +Train: [3] [5200/6250] eta: 0:02:52 lr: 0.000096 grad: 0.2015 (0.2204) loss: 0.9729 (0.9736) time: 0.1922 data: 0.1131 max mem: 9377 +Train: [3] [5300/6250] eta: 0:02:35 lr: 0.000096 grad: 0.2054 (0.2201) loss: 0.9735 (0.9735) time: 0.1516 data: 0.0658 max mem: 9377 +Train: [3] [5400/6250] eta: 0:02:19 lr: 0.000097 grad: 0.1532 (0.2196) loss: 0.9705 (0.9735) time: 0.1541 data: 0.0648 max mem: 9377 +Train: [3] [5500/6250] eta: 0:02:02 lr: 0.000097 grad: 0.2388 (0.2192) loss: 0.9714 (0.9734) time: 0.1718 data: 0.0857 max mem: 9377 +Train: [3] [5600/6250] eta: 0:01:46 lr: 0.000097 grad: 0.1755 (0.2187) loss: 0.9733 (0.9734) time: 0.1597 data: 0.0666 max mem: 9377 +Train: [3] [5700/6250] eta: 0:01:29 lr: 0.000098 grad: 0.1456 (0.2186) loss: 0.9698 (0.9733) time: 0.1774 data: 0.0907 max mem: 9377 +Train: [3] [5800/6250] eta: 0:01:13 lr: 0.000098 grad: 0.1409 (0.2182) loss: 0.9694 (0.9732) time: 0.1716 data: 0.0836 max mem: 9377 +Train: [3] [5900/6250] eta: 0:00:57 lr: 0.000099 grad: 0.1461 (0.2179) loss: 0.9671 (0.9732) time: 0.2065 data: 0.1321 max mem: 9377 +Train: [3] [6000/6250] eta: 0:00:40 lr: 0.000099 grad: 0.1734 (0.2174) loss: 0.9706 (0.9731) time: 0.1485 data: 0.0689 max mem: 9377 +Train: [3] [6100/6250] eta: 0:00:24 lr: 0.000099 grad: 0.2011 (0.2176) loss: 0.9736 (0.9731) time: 0.1696 data: 0.0826 max mem: 9377 +Train: [3] [6200/6250] eta: 0:00:08 lr: 0.000100 grad: 0.1758 (0.2174) loss: 0.9698 (0.9731) time: 0.2278 data: 0.1476 max mem: 9377 +Train: [3] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1793 (0.2171) loss: 0.9715 (0.9731) time: 0.2097 data: 0.1302 max mem: 9377 +Train: [3] Total time: 0:17:10 (0.1649 s / it) +Averaged stats: lr: 0.000100 grad: 0.1793 (0.2171) loss: 0.9715 (0.9731) +Eval (hcp-train-subset): [3] [ 0/62] eta: 0:05:08 loss: 0.9820 (0.9820) time: 4.9711 data: 4.9398 max mem: 9377 +Eval (hcp-train-subset): [3] [61/62] eta: 0:00:00 loss: 0.9757 (0.9760) time: 0.1614 data: 0.1358 max mem: 9377 +Eval (hcp-train-subset): [3] Total time: 0:00:15 (0.2535 s / it) +Averaged stats (hcp-train-subset): loss: 0.9757 (0.9760) +Eval (hcp-val): [3] [ 0/62] eta: 0:05:14 loss: 0.9686 (0.9686) time: 5.0658 data: 5.0362 max mem: 9377 +Eval (hcp-val): [3] [61/62] eta: 0:00:00 loss: 0.9762 (0.9756) time: 0.1558 data: 0.1285 max mem: 9377 +Eval (hcp-val): [3] Total time: 0:00:15 (0.2451 s / it) +Averaged stats (hcp-val): loss: 0.9762 (0.9756) +Eval (nsd-val): [3] [ 0/62] eta: 0:05:49 loss: 0.9712 (0.9712) time: 5.6324 data: 5.6027 max mem: 9377 +Eval (nsd-val): [3] [61/62] eta: 0:00:00 loss: 0.9766 (0.9750) time: 0.1316 data: 0.1054 max mem: 9377 +Eval (nsd-val): [3] Total time: 0:00:14 (0.2289 s / it) +Averaged stats (nsd-val): loss: 0.9766 (0.9750) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +Train: [4] [ 0/6250] eta: 11:30:09 lr: 0.000100 grad: 0.3113 (0.3113) loss: 0.9576 (0.9576) time: 6.6255 data: 6.5172 max mem: 9377 +Train: [4] [ 100/6250] eta: 0:24:05 lr: 0.000100 grad: 0.1646 (0.2105) loss: 0.9696 (0.9709) time: 0.1627 data: 0.0777 max mem: 9377 +Train: [4] [ 200/6250] eta: 0:22:01 lr: 0.000101 grad: 0.1876 (0.2060) loss: 0.9704 (0.9688) time: 0.1618 data: 0.0724 max mem: 9377 +Train: [4] [ 300/6250] eta: 0:20:27 lr: 0.000101 grad: 0.1449 (0.1971) loss: 0.9704 (0.9687) time: 0.1781 data: 0.0925 max mem: 9377 +Train: [4] [ 400/6250] eta: 0:19:17 lr: 0.000102 grad: 0.2713 (0.2066) loss: 0.9687 (0.9686) time: 0.1825 data: 0.0981 max mem: 9377 +Train: [4] [ 500/6250] eta: 0:18:45 lr: 0.000102 grad: 0.1414 (0.2056) loss: 0.9660 (0.9686) time: 0.2006 data: 0.0989 max mem: 9377 +Train: [4] [ 600/6250] eta: 0:18:28 lr: 0.000102 grad: 0.1910 (0.2023) loss: 0.9669 (0.9684) time: 0.2163 data: 0.1077 max mem: 9377 +Train: [4] [ 700/6250] eta: 0:18:03 lr: 0.000103 grad: 0.1802 (0.2009) loss: 0.9668 (0.9680) time: 0.1853 data: 0.0749 max mem: 9377 +Train: [4] [ 800/6250] eta: 0:17:39 lr: 0.000103 grad: 0.2583 (0.2022) loss: 0.9684 (0.9678) time: 0.1965 data: 0.0865 max mem: 9377 +Train: [4] [ 900/6250] eta: 0:17:07 lr: 0.000104 grad: 0.1811 (0.2018) loss: 0.9643 (0.9677) time: 0.1652 data: 0.0639 max mem: 9377 +Train: [4] [1000/6250] eta: 0:16:39 lr: 0.000104 grad: 0.1845 (0.2005) loss: 0.9670 (0.9676) time: 0.1733 data: 0.0707 max mem: 9377 +Train: [4] [1100/6250] eta: 0:16:08 lr: 0.000104 grad: 0.1935 (0.2013) loss: 0.9672 (0.9674) time: 0.1678 data: 0.0718 max mem: 9377 +Train: [4] [1200/6250] eta: 0:15:39 lr: 0.000105 grad: 0.1358 (0.2000) loss: 0.9653 (0.9673) time: 0.1650 data: 0.0759 max mem: 9377 +Train: [4] [1300/6250] eta: 0:15:11 lr: 0.000105 grad: 0.2106 (0.1999) loss: 0.9674 (0.9673) time: 0.1373 data: 0.0426 max mem: 9377 +Train: [4] [1400/6250] eta: 0:14:45 lr: 0.000106 grad: 0.1991 (0.2004) loss: 0.9658 (0.9672) time: 0.1470 data: 0.0531 max mem: 9377 +Train: [4] [1500/6250] eta: 0:14:17 lr: 0.000106 grad: 0.1655 (0.2002) loss: 0.9598 (0.9670) time: 0.1515 data: 0.0686 max mem: 9377 +Train: [4] [1600/6250] eta: 0:13:52 lr: 0.000106 grad: 0.2047 (0.2016) loss: 0.9626 (0.9668) time: 0.1702 data: 0.0860 max mem: 9377 +Train: [4] [1700/6250] eta: 0:13:30 lr: 0.000107 grad: 0.2039 (0.2010) loss: 0.9633 (0.9666) time: 0.1684 data: 0.0828 max mem: 9377 +Train: [4] [1800/6250] eta: 0:13:05 lr: 0.000107 grad: 0.1534 (0.2006) loss: 0.9654 (0.9664) time: 0.1439 data: 0.0513 max mem: 9377 +Train: [4] [1900/6250] eta: 0:12:42 lr: 0.000108 grad: 0.2266 (0.2026) loss: 0.9628 (0.9662) time: 0.1506 data: 0.0646 max mem: 9377 +Train: [4] [2000/6250] eta: 0:12:22 lr: 0.000108 grad: 0.1722 (0.2023) loss: 0.9619 (0.9660) time: 0.1641 data: 0.0739 max mem: 9377 +Train: [4] [2100/6250] eta: 0:12:00 lr: 0.000108 grad: 0.1541 (0.2023) loss: 0.9590 (0.9658) time: 0.1568 data: 0.0627 max mem: 9377 +Train: [4] [2200/6250] eta: 0:11:39 lr: 0.000109 grad: 0.1997 (0.2024) loss: 0.9620 (0.9657) time: 0.1594 data: 0.0686 max mem: 9377 +Train: [4] [2300/6250] eta: 0:11:20 lr: 0.000109 grad: 0.1713 (0.2027) loss: 0.9613 (0.9655) time: 0.1651 data: 0.0785 max mem: 9377 +Train: [4] [2400/6250] eta: 0:11:01 lr: 0.000110 grad: 0.2453 (0.2039) loss: 0.9622 (0.9653) time: 0.1571 data: 0.0648 max mem: 9377 +Train: [4] [2500/6250] eta: 0:10:41 lr: 0.000110 grad: 0.1759 (0.2036) loss: 0.9589 (0.9652) time: 0.1533 data: 0.0660 max mem: 9377 +Train: [4] [2600/6250] eta: 0:10:23 lr: 0.000110 grad: 0.2362 (0.2039) loss: 0.9635 (0.9649) time: 0.1756 data: 0.0909 max mem: 9377 +Train: [4] [2700/6250] eta: 0:10:04 lr: 0.000111 grad: 0.1930 (0.2042) loss: 0.9551 (0.9647) time: 0.1426 data: 0.0494 max mem: 9377 +Train: [4] [2800/6250] eta: 0:09:46 lr: 0.000111 grad: 0.2240 (0.2051) loss: 0.9593 (0.9644) time: 0.1715 data: 0.0818 max mem: 9377 +Train: [4] [2900/6250] eta: 0:09:28 lr: 0.000112 grad: 0.1709 (0.2052) loss: 0.9574 (0.9642) time: 0.1375 data: 0.0502 max mem: 9377 +Train: [4] [3000/6250] eta: 0:09:10 lr: 0.000112 grad: 0.1945 (0.2061) loss: 0.9547 (0.9639) time: 0.1362 data: 0.0431 max mem: 9377 +Train: [4] [3100/6250] eta: 0:08:52 lr: 0.000112 grad: 0.2144 (0.2068) loss: 0.9559 (0.9636) time: 0.1436 data: 0.0506 max mem: 9377 +Train: [4] [3200/6250] eta: 0:08:34 lr: 0.000113 grad: 0.1825 (0.2081) loss: 0.9552 (0.9633) time: 0.1745 data: 0.0879 max mem: 9377 +Train: [4] [3300/6250] eta: 0:08:15 lr: 0.000113 grad: 0.1979 (0.2086) loss: 0.9525 (0.9630) time: 0.1522 data: 0.0634 max mem: 9377 +Train: [4] [3400/6250] eta: 0:07:58 lr: 0.000114 grad: 0.2345 (0.2097) loss: 0.9510 (0.9627) time: 0.1800 data: 0.0884 max mem: 9377 +Train: [4] [3500/6250] eta: 0:07:40 lr: 0.000114 grad: 0.2863 (0.2113) loss: 0.9484 (0.9623) time: 0.1556 data: 0.0699 max mem: 9377 +Train: [4] [3600/6250] eta: 0:07:23 lr: 0.000114 grad: 0.2343 (0.2124) loss: 0.9485 (0.9619) time: 0.1641 data: 0.0659 max mem: 9377 +Train: [4] [3700/6250] eta: 0:07:05 lr: 0.000115 grad: 0.2908 (0.2144) loss: 0.9449 (0.9615) time: 0.1510 data: 0.0608 max mem: 9377 +Train: [4] [3800/6250] eta: 0:06:47 lr: 0.000115 grad: 0.2447 (0.2160) loss: 0.9470 (0.9611) time: 0.1479 data: 0.0593 max mem: 9377 +Train: [4] [3900/6250] eta: 0:06:30 lr: 0.000116 grad: 0.2742 (0.2172) loss: 0.9403 (0.9606) time: 0.1654 data: 0.0830 max mem: 9377 +Train: [4] [4000/6250] eta: 0:06:13 lr: 0.000116 grad: 0.2220 (0.2189) loss: 0.9406 (0.9601) time: 0.1665 data: 0.0834 max mem: 9377 +Train: [4] [4100/6250] eta: 0:05:56 lr: 0.000116 grad: 0.3193 (0.2207) loss: 0.9443 (0.9596) time: 0.1587 data: 0.0692 max mem: 9377 +Train: [4] [4200/6250] eta: 0:05:40 lr: 0.000117 grad: 0.2789 (0.2223) loss: 0.9412 (0.9592) time: 0.1644 data: 0.0845 max mem: 9377 +Train: [4] [4300/6250] eta: 0:05:23 lr: 0.000117 grad: 0.1924 (0.2229) loss: 0.9337 (0.9587) time: 0.1621 data: 0.0766 max mem: 9377 +Train: [4] [4400/6250] eta: 0:05:06 lr: 0.000118 grad: 0.2889 (0.2237) loss: 0.9363 (0.9582) time: 0.1364 data: 0.0441 max mem: 9377 +Train: [4] [4500/6250] eta: 0:04:49 lr: 0.000118 grad: 0.3475 (0.2253) loss: 0.9338 (0.9576) time: 0.1635 data: 0.0764 max mem: 9377 +Train: [4] [4600/6250] eta: 0:04:33 lr: 0.000118 grad: 0.2837 (0.2262) loss: 0.9316 (0.9571) time: 0.1512 data: 0.0623 max mem: 9377 +Train: [4] [4700/6250] eta: 0:04:16 lr: 0.000119 grad: 0.2096 (0.2268) loss: 0.9317 (0.9566) time: 0.1540 data: 0.0672 max mem: 9377 +Train: [4] [4800/6250] eta: 0:04:00 lr: 0.000119 grad: 0.2464 (0.2280) loss: 0.9345 (0.9561) time: 0.2266 data: 0.1443 max mem: 9377 +Train: [4] [4900/6250] eta: 0:03:43 lr: 0.000120 grad: 0.2598 (0.2285) loss: 0.9295 (0.9556) time: 0.1741 data: 0.0907 max mem: 9377 +Train: [4] [5000/6250] eta: 0:03:26 lr: 0.000120 grad: 0.2231 (0.2292) loss: 0.9355 (0.9551) time: 0.1472 data: 0.0551 max mem: 9377 +Train: [4] [5100/6250] eta: 0:03:10 lr: 0.000120 grad: 0.2553 (0.2299) loss: 0.9380 (0.9547) time: 0.1462 data: 0.0612 max mem: 9377 +Train: [4] [5200/6250] eta: 0:02:53 lr: 0.000121 grad: 0.2340 (0.2299) loss: 0.9366 (0.9542) time: 0.1547 data: 0.0668 max mem: 9377 +Train: [4] [5300/6250] eta: 0:02:37 lr: 0.000121 grad: 0.2311 (0.2304) loss: 0.9282 (0.9538) time: 0.1505 data: 0.0598 max mem: 9377 +Train: [4] [5400/6250] eta: 0:02:20 lr: 0.000122 grad: 0.2075 (0.2307) loss: 0.9312 (0.9534) time: 0.1347 data: 0.0507 max mem: 9377 +Train: [4] [5500/6250] eta: 0:02:03 lr: 0.000122 grad: 0.1802 (0.2304) loss: 0.9322 (0.9530) time: 0.1586 data: 0.0596 max mem: 9377 +Train: [4] [5600/6250] eta: 0:01:46 lr: 0.000122 grad: 0.2830 (0.2305) loss: 0.9299 (0.9525) time: 0.1657 data: 0.0787 max mem: 9377 +Train: [4] [5700/6250] eta: 0:01:30 lr: 0.000123 grad: 0.2381 (0.2305) loss: 0.9320 (0.9521) time: 0.1455 data: 0.0660 max mem: 9377 +Train: [4] [5800/6250] eta: 0:01:14 lr: 0.000123 grad: 0.1790 (0.2307) loss: 0.9279 (0.9517) time: 0.1881 data: 0.0970 max mem: 9377 +Train: [4] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.2487 (0.2308) loss: 0.9329 (0.9513) time: 0.1765 data: 0.0810 max mem: 9377 +Train: [4] [6000/6250] eta: 0:00:41 lr: 0.000124 grad: 0.2310 (0.2308) loss: 0.9265 (0.9509) time: 0.1690 data: 0.0789 max mem: 9377 +Train: [4] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.2015 (0.2309) loss: 0.9222 (0.9505) time: 0.1754 data: 0.0852 max mem: 9377 +Train: [4] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1959 (0.2307) loss: 0.9219 (0.9501) time: 0.2166 data: 0.1326 max mem: 9377 +Train: [4] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1935 (0.2306) loss: 0.9211 (0.9499) time: 0.1832 data: 0.0915 max mem: 9377 +Train: [4] Total time: 0:17:17 (0.1660 s / it) +Averaged stats: lr: 0.000125 grad: 0.1935 (0.2306) loss: 0.9211 (0.9499) +Eval (hcp-train-subset): [4] [ 0/62] eta: 0:03:44 loss: 0.9308 (0.9308) time: 3.6217 data: 3.5482 max mem: 9377 +Eval (hcp-train-subset): [4] [61/62] eta: 0:00:00 loss: 0.9333 (0.9324) time: 0.1508 data: 0.1257 max mem: 9377 +Eval (hcp-train-subset): [4] Total time: 0:00:14 (0.2379 s / it) +Averaged stats (hcp-train-subset): loss: 0.9333 (0.9324) +Making plots (hcp-train-subset): example=27 +Eval (hcp-val): [4] [ 0/62] eta: 0:04:30 loss: 0.9304 (0.9304) time: 4.3596 data: 4.2501 max mem: 9377 +Eval (hcp-val): [4] [61/62] eta: 0:00:00 loss: 0.9331 (0.9314) time: 0.1513 data: 0.1262 max mem: 9377 +Eval (hcp-val): [4] Total time: 0:00:14 (0.2387 s / it) +Averaged stats (hcp-val): loss: 0.9331 (0.9314) +Making plots (hcp-val): example=1 +Eval (nsd-val): [4] [ 0/62] eta: 0:06:01 loss: 0.9005 (0.9005) time: 5.8303 data: 5.7992 max mem: 9377 +Eval (nsd-val): [4] [61/62] eta: 0:00:00 loss: 0.9072 (0.9086) time: 0.1141 data: 0.0892 max mem: 9377 +Eval (nsd-val): [4] Total time: 0:00:14 (0.2274 s / it) +Averaged stats (nsd-val): loss: 0.9072 (0.9086) +Making plots (nsd-val): example=28 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00004.pth +Train: [5] [ 0/6250] eta: 10:08:39 lr: 0.000125 grad: 0.1065 (0.1065) loss: 0.9670 (0.9670) time: 5.8431 data: 5.6912 max mem: 9377 +Train: [5] [ 100/6250] eta: 0:23:03 lr: 0.000125 grad: 0.1925 (0.2274) loss: 0.9315 (0.9371) time: 0.1564 data: 0.0646 max mem: 9377 +Train: [5] [ 200/6250] eta: 0:20:57 lr: 0.000125 grad: 0.2196 (0.2300) loss: 0.9234 (0.9328) time: 0.1594 data: 0.0679 max mem: 9377 +Train: [5] [ 300/6250] eta: 0:19:41 lr: 0.000125 grad: 0.1914 (0.2287) loss: 0.9196 (0.9299) time: 0.1474 data: 0.0596 max mem: 9377 +Train: [5] [ 400/6250] eta: 0:18:18 lr: 0.000125 grad: 0.1905 (0.2256) loss: 0.9236 (0.9279) time: 0.1555 data: 0.0659 max mem: 9377 +Train: [5] [ 500/6250] eta: 0:17:34 lr: 0.000125 grad: 0.2135 (0.2271) loss: 0.9208 (0.9264) time: 0.1469 data: 0.0607 max mem: 9377 +Train: [5] [ 600/6250] eta: 0:17:01 lr: 0.000125 grad: 0.1962 (0.2261) loss: 0.9237 (0.9253) time: 0.1762 data: 0.0760 max mem: 9377 +Train: [5] [ 700/6250] eta: 0:16:45 lr: 0.000125 grad: 0.1731 (0.2254) loss: 0.9267 (0.9247) time: 0.1515 data: 0.0510 max mem: 9377 +Train: [5] [ 800/6250] eta: 0:16:19 lr: 0.000125 grad: 0.2145 (0.2237) loss: 0.9206 (0.9242) time: 0.1669 data: 0.0632 max mem: 9377 +Train: [5] [ 900/6250] eta: 0:15:50 lr: 0.000125 grad: 0.1733 (0.2231) loss: 0.9232 (0.9237) time: 0.1755 data: 0.0599 max mem: 9377 +Train: [5] [1000/6250] eta: 0:15:27 lr: 0.000125 grad: 0.2088 (0.2230) loss: 0.9232 (0.9232) time: 0.1761 data: 0.0797 max mem: 9377 +Train: [5] [1100/6250] eta: 0:15:04 lr: 0.000125 grad: 0.1965 (0.2212) loss: 0.9170 (0.9228) time: 0.1285 data: 0.0278 max mem: 9377 +Train: [5] [1200/6250] eta: 0:14:39 lr: 0.000125 grad: 0.1650 (0.2191) loss: 0.9205 (0.9225) time: 0.1519 data: 0.0571 max mem: 9377 +Train: [5] [1300/6250] eta: 0:14:15 lr: 0.000125 grad: 0.1652 (0.2167) loss: 0.9202 (0.9223) time: 0.1353 data: 0.0400 max mem: 9377 +Train: [5] [1400/6250] eta: 0:13:53 lr: 0.000125 grad: 0.1993 (0.2155) loss: 0.9229 (0.9221) time: 0.1620 data: 0.0705 max mem: 9377 +Train: [5] [1500/6250] eta: 0:13:32 lr: 0.000125 grad: 0.1834 (0.2137) loss: 0.9167 (0.9219) time: 0.1692 data: 0.0782 max mem: 9377 +Train: [5] [1600/6250] eta: 0:13:09 lr: 0.000125 grad: 0.1860 (0.2120) loss: 0.9210 (0.9216) time: 0.1375 data: 0.0493 max mem: 9377 +Train: [5] [1700/6250] eta: 0:12:48 lr: 0.000125 grad: 0.1878 (0.2106) loss: 0.9165 (0.9213) time: 0.1372 data: 0.0403 max mem: 9377 +Train: [5] [1800/6250] eta: 0:12:26 lr: 0.000125 grad: 0.1725 (0.2095) loss: 0.9127 (0.9210) time: 0.1163 data: 0.0219 max mem: 9377 +Train: [5] [1900/6250] eta: 0:12:06 lr: 0.000125 grad: 0.1889 (0.2082) loss: 0.9135 (0.9207) time: 0.1245 data: 0.0318 max mem: 9377 +Train: [5] [2000/6250] eta: 0:11:45 lr: 0.000125 grad: 0.1538 (0.2063) loss: 0.9121 (0.9204) time: 0.1501 data: 0.0638 max mem: 9377 +Train: [5] [2100/6250] eta: 0:11:28 lr: 0.000125 grad: 0.1727 (0.2048) loss: 0.9183 (0.9201) time: 0.1842 data: 0.1001 max mem: 9377 +Train: [5] [2200/6250] eta: 0:11:09 lr: 0.000125 grad: 0.1625 (0.2040) loss: 0.9192 (0.9199) time: 0.1585 data: 0.0773 max mem: 9377 +Train: [5] [2300/6250] eta: 0:10:52 lr: 0.000125 grad: 0.1823 (0.2028) loss: 0.9127 (0.9195) time: 0.1522 data: 0.0590 max mem: 9377 +Train: [5] [2400/6250] eta: 0:10:34 lr: 0.000125 grad: 0.1657 (0.2025) loss: 0.9138 (0.9192) time: 0.1842 data: 0.0971 max mem: 9377 +Train: [5] [2500/6250] eta: 0:10:15 lr: 0.000125 grad: 0.1535 (0.2016) loss: 0.9126 (0.9189) time: 0.1426 data: 0.0490 max mem: 9377 +Train: [5] [2600/6250] eta: 0:09:58 lr: 0.000125 grad: 0.1684 (0.2013) loss: 0.9154 (0.9186) time: 0.1678 data: 0.0748 max mem: 9377 +Train: [5] [2700/6250] eta: 0:09:41 lr: 0.000125 grad: 0.1556 (0.2016) loss: 0.9045 (0.9183) time: 0.1548 data: 0.0642 max mem: 9377 +Train: [5] [2800/6250] eta: 0:09:24 lr: 0.000125 grad: 0.1685 (0.2009) loss: 0.9143 (0.9180) time: 0.1780 data: 0.0944 max mem: 9377 +Train: [5] [2900/6250] eta: 0:09:07 lr: 0.000125 grad: 0.1714 (0.2001) loss: 0.9087 (0.9177) time: 0.1434 data: 0.0545 max mem: 9377 +Train: [5] [3000/6250] eta: 0:08:50 lr: 0.000125 grad: 0.1953 (0.1995) loss: 0.9091 (0.9174) time: 0.1475 data: 0.0583 max mem: 9377 +Train: [5] [3100/6250] eta: 0:08:33 lr: 0.000125 grad: 0.1726 (0.1988) loss: 0.9060 (0.9171) time: 0.1493 data: 0.0639 max mem: 9377 +Train: [5] [3200/6250] eta: 0:08:17 lr: 0.000125 grad: 0.1499 (0.1975) loss: 0.9080 (0.9169) time: 0.1560 data: 0.0724 max mem: 9377 +Train: [5] [3300/6250] eta: 0:08:00 lr: 0.000125 grad: 0.1663 (0.1975) loss: 0.9059 (0.9165) time: 0.1478 data: 0.0603 max mem: 9377 +Train: [5] [3400/6250] eta: 0:07:44 lr: 0.000125 grad: 0.1664 (0.1971) loss: 0.9057 (0.9163) time: 0.1881 data: 0.0923 max mem: 9377 +Train: [5] [3500/6250] eta: 0:07:27 lr: 0.000125 grad: 0.1686 (0.1963) loss: 0.9058 (0.9159) time: 0.1586 data: 0.0748 max mem: 9377 +Train: [5] [3600/6250] eta: 0:07:11 lr: 0.000125 grad: 0.1507 (0.1961) loss: 0.9012 (0.9155) time: 0.1275 data: 0.0318 max mem: 9377 +Train: [5] [3700/6250] eta: 0:06:54 lr: 0.000125 grad: 0.1761 (0.1957) loss: 0.9054 (0.9152) time: 0.1373 data: 0.0541 max mem: 9377 +Train: [5] [3800/6250] eta: 0:06:38 lr: 0.000125 grad: 0.1665 (0.1951) loss: 0.9084 (0.9150) time: 0.1650 data: 0.0793 max mem: 9377 +Train: [5] [3900/6250] eta: 0:06:23 lr: 0.000125 grad: 0.1712 (0.1949) loss: 0.9070 (0.9146) time: 0.1821 data: 0.1012 max mem: 9377 +Train: [5] [4000/6250] eta: 0:06:06 lr: 0.000125 grad: 0.1884 (0.1946) loss: 0.9027 (0.9144) time: 0.1588 data: 0.0699 max mem: 9377 +Train: [5] [4100/6250] eta: 0:05:50 lr: 0.000125 grad: 0.1459 (0.1939) loss: 0.9010 (0.9141) time: 0.1820 data: 0.1002 max mem: 9377 +Train: [5] [4200/6250] eta: 0:05:33 lr: 0.000125 grad: 0.1529 (0.1934) loss: 0.9040 (0.9138) time: 0.1612 data: 0.0743 max mem: 9377 +Train: [5] [4300/6250] eta: 0:05:17 lr: 0.000125 grad: 0.1741 (0.1930) loss: 0.9065 (0.9136) time: 0.1645 data: 0.0772 max mem: 9377 +Train: [5] [4400/6250] eta: 0:05:00 lr: 0.000125 grad: 0.1571 (0.1929) loss: 0.8980 (0.9133) time: 0.1471 data: 0.0515 max mem: 9377 +Train: [5] [4500/6250] eta: 0:04:43 lr: 0.000125 grad: 0.1831 (0.1927) loss: 0.8967 (0.9131) time: 0.1511 data: 0.0640 max mem: 9377 +Train: [5] [4600/6250] eta: 0:04:27 lr: 0.000125 grad: 0.1722 (0.1922) loss: 0.8988 (0.9127) time: 0.1771 data: 0.0781 max mem: 9377 +Train: [5] [4700/6250] eta: 0:04:10 lr: 0.000125 grad: 0.1647 (0.1920) loss: 0.8906 (0.9124) time: 0.1401 data: 0.0389 max mem: 9377 +Train: [5] [4800/6250] eta: 0:03:54 lr: 0.000125 grad: 0.1714 (0.1915) loss: 0.8987 (0.9121) time: 0.1445 data: 0.0613 max mem: 9377 +Train: [5] [4900/6250] eta: 0:03:38 lr: 0.000125 grad: 0.1795 (0.1912) loss: 0.8976 (0.9117) time: 0.2196 data: 0.1321 max mem: 9377 +Train: [5] [5000/6250] eta: 0:03:21 lr: 0.000125 grad: 0.1694 (0.1910) loss: 0.8921 (0.9114) time: 0.1541 data: 0.0695 max mem: 9377 +Train: [5] [5100/6250] eta: 0:03:05 lr: 0.000125 grad: 0.1626 (0.1908) loss: 0.8931 (0.9110) time: 0.1313 data: 0.0405 max mem: 9377 +Train: [5] [5200/6250] eta: 0:02:49 lr: 0.000125 grad: 0.1856 (0.1906) loss: 0.8931 (0.9107) time: 0.1173 data: 0.0277 max mem: 9377 +Train: [5] [5300/6250] eta: 0:02:33 lr: 0.000125 grad: 0.1808 (0.1904) loss: 0.8858 (0.9103) time: 0.1418 data: 0.0572 max mem: 9377 +Train: [5] [5400/6250] eta: 0:02:16 lr: 0.000125 grad: 0.1863 (0.1902) loss: 0.8931 (0.9099) time: 0.1247 data: 0.0354 max mem: 9377 +Train: [5] [5500/6250] eta: 0:02:00 lr: 0.000125 grad: 0.1495 (0.1901) loss: 0.8884 (0.9096) time: 0.1472 data: 0.0590 max mem: 9377 +Train: [5] [5600/6250] eta: 0:01:44 lr: 0.000125 grad: 0.1720 (0.1898) loss: 0.8886 (0.9092) time: 0.1575 data: 0.0674 max mem: 9377 +Train: [5] [5700/6250] eta: 0:01:28 lr: 0.000125 grad: 0.1552 (0.1896) loss: 0.8924 (0.9089) time: 0.1726 data: 0.0867 max mem: 9377 +Train: [5] [5800/6250] eta: 0:01:12 lr: 0.000125 grad: 0.1736 (0.1893) loss: 0.8923 (0.9086) time: 0.1562 data: 0.0699 max mem: 9377 +Train: [5] [5900/6250] eta: 0:00:56 lr: 0.000125 grad: 0.1731 (0.1890) loss: 0.8874 (0.9082) time: 0.1168 data: 0.0340 max mem: 9377 +Train: [5] [6000/6250] eta: 0:00:40 lr: 0.000125 grad: 0.1565 (0.1887) loss: 0.8856 (0.9079) time: 0.1620 data: 0.0688 max mem: 9377 +Train: [5] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.1801 (0.1885) loss: 0.8809 (0.9075) time: 0.1777 data: 0.0882 max mem: 9377 +Train: [5] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1404 (0.1882) loss: 0.8851 (0.9072) time: 0.1470 data: 0.0487 max mem: 9377 +Train: [5] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1747 (0.1880) loss: 0.8838 (0.9070) time: 0.1843 data: 0.0931 max mem: 9377 +Train: [5] Total time: 0:16:52 (0.1619 s / it) +Averaged stats: lr: 0.000125 grad: 0.1747 (0.1880) loss: 0.8838 (0.9070) +Eval (hcp-train-subset): [5] [ 0/62] eta: 0:03:17 loss: 0.8922 (0.8922) time: 3.1819 data: 3.1043 max mem: 9377 +Eval (hcp-train-subset): [5] [61/62] eta: 0:00:00 loss: 0.8947 (0.8941) time: 0.1634 data: 0.1365 max mem: 9377 +Eval (hcp-train-subset): [5] Total time: 0:00:14 (0.2415 s / it) +Averaged stats (hcp-train-subset): loss: 0.8947 (0.8941) +Eval (hcp-val): [5] [ 0/62] eta: 0:03:51 loss: 0.8872 (0.8872) time: 3.7407 data: 3.6590 max mem: 9377 +Eval (hcp-val): [5] [61/62] eta: 0:00:00 loss: 0.8917 (0.8926) time: 0.1535 data: 0.1257 max mem: 9377 +Eval (hcp-val): [5] Total time: 0:00:14 (0.2396 s / it) +Averaged stats (hcp-val): loss: 0.8917 (0.8926) +Eval (nsd-val): [5] [ 0/62] eta: 0:05:33 loss: 0.8487 (0.8487) time: 5.3857 data: 5.3401 max mem: 9377 +Eval (nsd-val): [5] [61/62] eta: 0:00:00 loss: 0.8598 (0.8582) time: 0.1366 data: 0.1101 max mem: 9377 +Eval (nsd-val): [5] Total time: 0:00:14 (0.2266 s / it) +Averaged stats (nsd-val): loss: 0.8598 (0.8582) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +Train: [6] [ 0/6250] eta: 11:04:40 lr: 0.000125 grad: 0.1467 (0.1467) loss: 0.8845 (0.8845) time: 6.3809 data: 6.2139 max mem: 9377 +Train: [6] [ 100/6250] eta: 0:23:05 lr: 0.000125 grad: 0.2116 (0.2165) loss: 0.8863 (0.8878) time: 0.1787 data: 0.0874 max mem: 9377 +Train: [6] [ 200/6250] eta: 0:20:04 lr: 0.000125 grad: 0.2046 (0.2157) loss: 0.8852 (0.8861) time: 0.1642 data: 0.0731 max mem: 9377 +Train: [6] [ 300/6250] eta: 0:18:32 lr: 0.000125 grad: 0.1543 (0.2019) loss: 0.8898 (0.8861) time: 0.1796 data: 0.0852 max mem: 9377 +Train: [6] [ 400/6250] eta: 0:17:48 lr: 0.000125 grad: 0.1509 (0.1911) loss: 0.8821 (0.8856) time: 0.1750 data: 0.0884 max mem: 9377 +Train: [6] [ 500/6250] eta: 0:16:46 lr: 0.000125 grad: 0.1410 (0.1888) loss: 0.8833 (0.8853) time: 0.1379 data: 0.0568 max mem: 9377 +Train: [6] [ 600/6250] eta: 0:16:27 lr: 0.000125 grad: 0.1480 (0.1858) loss: 0.8800 (0.8847) time: 0.1507 data: 0.0676 max mem: 9377 +Train: [6] [ 700/6250] eta: 0:16:04 lr: 0.000125 grad: 0.1630 (0.1841) loss: 0.8755 (0.8840) time: 0.1527 data: 0.0372 max mem: 9377 +Train: [6] [ 800/6250] eta: 0:15:46 lr: 0.000125 grad: 0.1612 (0.1819) loss: 0.8795 (0.8835) time: 0.1619 data: 0.0411 max mem: 9377 +Train: [6] [ 900/6250] eta: 0:15:33 lr: 0.000125 grad: 0.1533 (0.1822) loss: 0.8784 (0.8831) time: 0.2026 data: 0.0928 max mem: 9377 +Train: [6] [1000/6250] eta: 0:15:08 lr: 0.000125 grad: 0.1576 (0.1793) loss: 0.8778 (0.8828) time: 0.1725 data: 0.0780 max mem: 9377 +Train: [6] [1100/6250] eta: 0:14:43 lr: 0.000125 grad: 0.1652 (0.1783) loss: 0.8799 (0.8825) time: 0.1419 data: 0.0441 max mem: 9377 +Train: [6] [1200/6250] eta: 0:14:18 lr: 0.000125 grad: 0.1453 (0.1771) loss: 0.8769 (0.8822) time: 0.1410 data: 0.0425 max mem: 9377 +Train: [6] [1300/6250] eta: 0:13:53 lr: 0.000125 grad: 0.1623 (0.1760) loss: 0.8790 (0.8819) time: 0.1531 data: 0.0709 max mem: 9377 +Train: [6] [1400/6250] eta: 0:13:35 lr: 0.000125 grad: 0.1633 (0.1756) loss: 0.8795 (0.8815) time: 0.1532 data: 0.0590 max mem: 9377 +Train: [6] [1500/6250] eta: 0:13:16 lr: 0.000125 grad: 0.1701 (0.1746) loss: 0.8788 (0.8812) time: 0.1600 data: 0.0706 max mem: 9377 +Train: [6] [1600/6250] eta: 0:12:58 lr: 0.000125 grad: 0.1576 (0.1743) loss: 0.8758 (0.8810) time: 0.1626 data: 0.0759 max mem: 9377 +Train: [6] [1700/6250] eta: 0:12:40 lr: 0.000125 grad: 0.1462 (0.1733) loss: 0.8720 (0.8808) time: 0.1325 data: 0.0441 max mem: 9377 +Train: [6] [1800/6250] eta: 0:12:22 lr: 0.000125 grad: 0.1442 (0.1721) loss: 0.8750 (0.8804) time: 0.1660 data: 0.0767 max mem: 9377 +Train: [6] [1900/6250] eta: 0:12:01 lr: 0.000125 grad: 0.1723 (0.1725) loss: 0.8763 (0.8802) time: 0.1547 data: 0.0612 max mem: 9377 +Train: [6] [2000/6250] eta: 0:11:43 lr: 0.000125 grad: 0.1506 (0.1717) loss: 0.8760 (0.8799) time: 0.1582 data: 0.0795 max mem: 9377 +Train: [6] [2100/6250] eta: 0:11:25 lr: 0.000125 grad: 0.1404 (0.1709) loss: 0.8784 (0.8797) time: 0.1639 data: 0.0817 max mem: 9377 +Train: [6] [2200/6250] eta: 0:11:05 lr: 0.000125 grad: 0.1371 (0.1699) loss: 0.8767 (0.8795) time: 0.1535 data: 0.0657 max mem: 9377 +Train: [6] [2300/6250] eta: 0:10:47 lr: 0.000125 grad: 0.1548 (0.1692) loss: 0.8712 (0.8794) time: 0.1575 data: 0.0739 max mem: 9377 +Train: [6] [2400/6250] eta: 0:10:30 lr: 0.000125 grad: 0.1857 (0.1691) loss: 0.8718 (0.8792) time: 0.1456 data: 0.0642 max mem: 9377 +Train: [6] [2500/6250] eta: 0:10:13 lr: 0.000125 grad: 0.1467 (0.1686) loss: 0.8723 (0.8790) time: 0.1669 data: 0.0821 max mem: 9377 +Train: [6] [2600/6250] eta: 0:09:55 lr: 0.000125 grad: 0.1457 (0.1674) loss: 0.8738 (0.8788) time: 0.1367 data: 0.0464 max mem: 9377 +Train: [6] [2700/6250] eta: 0:09:39 lr: 0.000125 grad: 0.1371 (0.1667) loss: 0.8680 (0.8786) time: 0.1430 data: 0.0571 max mem: 9377 +Train: [6] [2800/6250] eta: 0:09:22 lr: 0.000125 grad: 0.1450 (0.1664) loss: 0.8723 (0.8783) time: 0.1373 data: 0.0370 max mem: 9377 +Train: [6] [2900/6250] eta: 0:09:04 lr: 0.000125 grad: 0.1590 (0.1658) loss: 0.8669 (0.8780) time: 0.1601 data: 0.0765 max mem: 9377 +Train: [6] [3000/6250] eta: 0:08:47 lr: 0.000125 grad: 0.1559 (0.1652) loss: 0.8702 (0.8777) time: 0.1409 data: 0.0516 max mem: 9377 +Train: [6] [3100/6250] eta: 0:08:31 lr: 0.000125 grad: 0.1396 (0.1648) loss: 0.8694 (0.8775) time: 0.1564 data: 0.0769 max mem: 9377 +Train: [6] [3200/6250] eta: 0:08:13 lr: 0.000125 grad: 0.1476 (0.1642) loss: 0.8688 (0.8773) time: 0.1606 data: 0.0730 max mem: 9377 +Train: [6] [3300/6250] eta: 0:07:57 lr: 0.000125 grad: 0.1211 (0.1636) loss: 0.8777 (0.8771) time: 0.1496 data: 0.0677 max mem: 9377 +Train: [6] [3400/6250] eta: 0:07:40 lr: 0.000125 grad: 0.1776 (0.1634) loss: 0.8643 (0.8769) time: 0.1529 data: 0.0683 max mem: 9377 +Train: [6] [3500/6250] eta: 0:07:24 lr: 0.000125 grad: 0.1476 (0.1630) loss: 0.8705 (0.8767) time: 0.1450 data: 0.0566 max mem: 9377 +Train: [6] [3600/6250] eta: 0:07:08 lr: 0.000125 grad: 0.1330 (0.1626) loss: 0.8644 (0.8766) time: 0.1818 data: 0.0967 max mem: 9377 +Train: [6] [3700/6250] eta: 0:06:51 lr: 0.000125 grad: 0.1490 (0.1622) loss: 0.8762 (0.8765) time: 0.1734 data: 0.0858 max mem: 9377 +Train: [6] [3800/6250] eta: 0:06:35 lr: 0.000125 grad: 0.1260 (0.1618) loss: 0.8722 (0.8764) time: 0.1735 data: 0.0880 max mem: 9377 +Train: [6] [3900/6250] eta: 0:06:19 lr: 0.000125 grad: 0.1585 (0.1614) loss: 0.8685 (0.8762) time: 0.1459 data: 0.0584 max mem: 9377 +Train: [6] [4000/6250] eta: 0:06:03 lr: 0.000125 grad: 0.1355 (0.1609) loss: 0.8656 (0.8761) time: 0.1271 data: 0.0446 max mem: 9377 +Train: [6] [4100/6250] eta: 0:05:46 lr: 0.000125 grad: 0.1517 (0.1607) loss: 0.8687 (0.8758) time: 0.1630 data: 0.0785 max mem: 9377 +Train: [6] [4200/6250] eta: 0:05:30 lr: 0.000125 grad: 0.1358 (0.1605) loss: 0.8709 (0.8756) time: 0.1398 data: 0.0470 max mem: 9377 +Train: [6] [4300/6250] eta: 0:05:14 lr: 0.000125 grad: 0.1391 (0.1604) loss: 0.8635 (0.8755) time: 0.1933 data: 0.1062 max mem: 9377 +Train: [6] [4400/6250] eta: 0:04:58 lr: 0.000125 grad: 0.1347 (0.1601) loss: 0.8718 (0.8753) time: 0.1640 data: 0.0751 max mem: 9377 +Train: [6] [4500/6250] eta: 0:04:41 lr: 0.000125 grad: 0.1144 (0.1596) loss: 0.8696 (0.8752) time: 0.1653 data: 0.0745 max mem: 9377 +Train: [6] [4600/6250] eta: 0:04:25 lr: 0.000125 grad: 0.1381 (0.1593) loss: 0.8636 (0.8750) time: 0.1729 data: 0.0864 max mem: 9377 +Train: [6] [4700/6250] eta: 0:04:09 lr: 0.000125 grad: 0.1306 (0.1591) loss: 0.8660 (0.8748) time: 0.1106 data: 0.0161 max mem: 9377 +Train: [6] [4800/6250] eta: 0:03:52 lr: 0.000125 grad: 0.1438 (0.1589) loss: 0.8713 (0.8746) time: 0.1531 data: 0.0637 max mem: 9377 +Train: [6] [4900/6250] eta: 0:03:36 lr: 0.000125 grad: 0.1302 (0.1586) loss: 0.8698 (0.8745) time: 0.1734 data: 0.0948 max mem: 9377 +Train: [6] [5000/6250] eta: 0:03:20 lr: 0.000125 grad: 0.1178 (0.1581) loss: 0.8645 (0.8743) time: 0.1504 data: 0.0586 max mem: 9377 +Train: [6] [5100/6250] eta: 0:03:04 lr: 0.000125 grad: 0.1370 (0.1579) loss: 0.8690 (0.8741) time: 0.1762 data: 0.0879 max mem: 9377 +Train: [6] [5200/6250] eta: 0:02:48 lr: 0.000125 grad: 0.1230 (0.1575) loss: 0.8639 (0.8739) time: 0.1570 data: 0.0677 max mem: 9377 +Train: [6] [5300/6250] eta: 0:02:31 lr: 0.000125 grad: 0.2008 (0.1575) loss: 0.8646 (0.8736) time: 0.1570 data: 0.0679 max mem: 9377 +Train: [6] [5400/6250] eta: 0:02:15 lr: 0.000125 grad: 0.1477 (0.1573) loss: 0.8665 (0.8734) time: 0.1828 data: 0.0928 max mem: 9377 +Train: [6] [5500/6250] eta: 0:01:59 lr: 0.000125 grad: 0.1363 (0.1572) loss: 0.8643 (0.8732) time: 0.1597 data: 0.0721 max mem: 9377 +Train: [6] [5600/6250] eta: 0:01:44 lr: 0.000125 grad: 0.1233 (0.1568) loss: 0.8656 (0.8730) time: 0.1576 data: 0.0630 max mem: 9377 +Train: [6] [5700/6250] eta: 0:01:28 lr: 0.000125 grad: 0.1389 (0.1569) loss: 0.8655 (0.8728) time: 0.1635 data: 0.0724 max mem: 9377 +Train: [6] [5800/6250] eta: 0:01:12 lr: 0.000125 grad: 0.1176 (0.1566) loss: 0.8605 (0.8725) time: 0.1342 data: 0.0368 max mem: 9377 +Train: [6] [5900/6250] eta: 0:00:56 lr: 0.000125 grad: 0.1646 (0.1565) loss: 0.8577 (0.8723) time: 0.1486 data: 0.0537 max mem: 9377 +Train: [6] [6000/6250] eta: 0:00:40 lr: 0.000125 grad: 0.1327 (0.1562) loss: 0.8627 (0.8721) time: 0.1998 data: 0.1111 max mem: 9377 +Train: [6] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.1329 (0.1559) loss: 0.8544 (0.8718) time: 0.1573 data: 0.0642 max mem: 9377 +Train: [6] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1276 (0.1557) loss: 0.8581 (0.8716) time: 0.1678 data: 0.0772 max mem: 9377 +Train: [6] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1370 (0.1557) loss: 0.8551 (0.8715) time: 0.1974 data: 0.1111 max mem: 9377 +Train: [6] Total time: 0:16:50 (0.1617 s / it) +Averaged stats: lr: 0.000125 grad: 0.1370 (0.1557) loss: 0.8551 (0.8715) +Eval (hcp-train-subset): [6] [ 0/62] eta: 0:05:23 loss: 0.8767 (0.8767) time: 5.2126 data: 5.1818 max mem: 9377 +Eval (hcp-train-subset): [6] [61/62] eta: 0:00:00 loss: 0.8772 (0.8791) time: 0.1473 data: 0.1210 max mem: 9377 +Eval (hcp-train-subset): [6] Total time: 0:00:14 (0.2396 s / it) +Averaged stats (hcp-train-subset): loss: 0.8772 (0.8791) +Eval (hcp-val): [6] [ 0/62] eta: 0:05:16 loss: 0.8775 (0.8775) time: 5.1089 data: 5.0790 max mem: 9377 +Eval (hcp-val): [6] [61/62] eta: 0:00:00 loss: 0.8756 (0.8774) time: 0.1442 data: 0.1193 max mem: 9377 +Eval (hcp-val): [6] Total time: 0:00:14 (0.2326 s / it) +Averaged stats (hcp-val): loss: 0.8756 (0.8774) +Eval (nsd-val): [6] [ 0/62] eta: 0:05:19 loss: 0.8397 (0.8397) time: 5.1468 data: 5.1042 max mem: 9377 +Eval (nsd-val): [6] [61/62] eta: 0:00:00 loss: 0.8444 (0.8454) time: 0.1225 data: 0.0967 max mem: 9377 +Eval (nsd-val): [6] Total time: 0:00:13 (0.2215 s / it) +Averaged stats (nsd-val): loss: 0.8444 (0.8454) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +Train: [7] [ 0/6250] eta: 12:20:46 lr: 0.000125 grad: 0.3818 (0.3818) loss: 0.7697 (0.7697) time: 7.1114 data: 7.0129 max mem: 9377 +Train: [7] [ 100/6250] eta: 0:23:32 lr: 0.000125 grad: 0.1320 (0.1758) loss: 0.8678 (0.8639) time: 0.1639 data: 0.0702 max mem: 9377 +Train: [7] [ 200/6250] eta: 0:21:09 lr: 0.000125 grad: 0.1154 (0.1547) loss: 0.8709 (0.8613) time: 0.2082 data: 0.1196 max mem: 9377 +Train: [7] [ 300/6250] eta: 0:19:39 lr: 0.000125 grad: 0.1330 (0.1503) loss: 0.8638 (0.8619) time: 0.1841 data: 0.0961 max mem: 9377 +Train: [7] [ 400/6250] eta: 0:18:55 lr: 0.000125 grad: 0.1294 (0.1461) loss: 0.8608 (0.8618) time: 0.1719 data: 0.0847 max mem: 9377 +Train: [7] [ 500/6250] eta: 0:18:00 lr: 0.000125 grad: 0.1438 (0.1439) loss: 0.8668 (0.8623) time: 0.1524 data: 0.0663 max mem: 9377 +Train: [7] [ 600/6250] eta: 0:17:23 lr: 0.000125 grad: 0.1183 (0.1420) loss: 0.8551 (0.8619) time: 0.1626 data: 0.0757 max mem: 9377 +Train: [7] [ 700/6250] eta: 0:16:45 lr: 0.000125 grad: 0.1270 (0.1410) loss: 0.8630 (0.8615) time: 0.1753 data: 0.0862 max mem: 9377 +Train: [7] [ 800/6250] eta: 0:16:27 lr: 0.000125 grad: 0.1105 (0.1407) loss: 0.8614 (0.8611) time: 0.1911 data: 0.0844 max mem: 9377 +Train: [7] [ 900/6250] eta: 0:16:12 lr: 0.000125 grad: 0.1370 (0.1401) loss: 0.8562 (0.8607) time: 0.1784 data: 0.0692 max mem: 9377 +Train: [7] [1000/6250] eta: 0:15:40 lr: 0.000125 grad: 0.1244 (0.1398) loss: 0.8568 (0.8601) time: 0.1524 data: 0.0445 max mem: 9377 +Train: [7] [1100/6250] eta: 0:15:10 lr: 0.000125 grad: 0.1195 (0.1395) loss: 0.8578 (0.8596) time: 0.1753 data: 0.0863 max mem: 9377 +Train: [7] [1200/6250] eta: 0:14:43 lr: 0.000125 grad: 0.1218 (0.1389) loss: 0.8551 (0.8594) time: 0.1635 data: 0.0768 max mem: 9377 +Train: [7] [1300/6250] eta: 0:14:15 lr: 0.000125 grad: 0.1368 (0.1388) loss: 0.8497 (0.8590) time: 0.1447 data: 0.0567 max mem: 9377 +Train: [7] [1400/6250] eta: 0:13:51 lr: 0.000125 grad: 0.1486 (0.1388) loss: 0.8581 (0.8585) time: 0.1362 data: 0.0434 max mem: 9377 +Train: [7] [1500/6250] eta: 0:13:32 lr: 0.000125 grad: 0.1429 (0.1392) loss: 0.8535 (0.8582) time: 0.1698 data: 0.0717 max mem: 9377 +Train: [7] [1600/6250] eta: 0:13:17 lr: 0.000125 grad: 0.1261 (0.1390) loss: 0.8542 (0.8579) time: 0.1899 data: 0.0931 max mem: 9377 +Train: [7] [1700/6250] eta: 0:12:58 lr: 0.000125 grad: 0.1262 (0.1395) loss: 0.8497 (0.8574) time: 0.1616 data: 0.0669 max mem: 9377 +Train: [7] [1800/6250] eta: 0:12:39 lr: 0.000125 grad: 0.1235 (0.1395) loss: 0.8592 (0.8573) time: 0.1507 data: 0.0636 max mem: 9377 +Train: [7] [1900/6250] eta: 0:12:19 lr: 0.000125 grad: 0.1185 (0.1389) loss: 0.8581 (0.8571) time: 0.1535 data: 0.0655 max mem: 9377 +Train: [7] [2000/6250] eta: 0:12:01 lr: 0.000125 grad: 0.1237 (0.1386) loss: 0.8564 (0.8570) time: 0.1611 data: 0.0711 max mem: 9377 +Train: [7] [2100/6250] eta: 0:11:42 lr: 0.000125 grad: 0.1349 (0.1387) loss: 0.8518 (0.8568) time: 0.1646 data: 0.0741 max mem: 9377 +Train: [7] [2200/6250] eta: 0:11:24 lr: 0.000125 grad: 0.1462 (0.1385) loss: 0.8470 (0.8565) time: 0.1431 data: 0.0476 max mem: 9377 +Train: [7] [2300/6250] eta: 0:11:05 lr: 0.000125 grad: 0.1254 (0.1390) loss: 0.8443 (0.8562) time: 0.1562 data: 0.0662 max mem: 9377 +Train: [7] [2400/6250] eta: 0:10:47 lr: 0.000125 grad: 0.1229 (0.1388) loss: 0.8500 (0.8558) time: 0.1806 data: 0.0885 max mem: 9377 +Train: [7] [2500/6250] eta: 0:10:28 lr: 0.000125 grad: 0.1284 (0.1384) loss: 0.8527 (0.8556) time: 0.1544 data: 0.0733 max mem: 9377 +Train: [7] [2600/6250] eta: 0:10:11 lr: 0.000125 grad: 0.1284 (0.1384) loss: 0.8492 (0.8554) time: 0.1701 data: 0.0849 max mem: 9377 +Train: [7] [2700/6250] eta: 0:09:52 lr: 0.000125 grad: 0.1214 (0.1381) loss: 0.8483 (0.8552) time: 0.1635 data: 0.0690 max mem: 9377 +Train: [7] [2800/6250] eta: 0:09:34 lr: 0.000125 grad: 0.1302 (0.1382) loss: 0.8521 (0.8550) time: 0.1688 data: 0.0802 max mem: 9377 +Train: [7] [2900/6250] eta: 0:09:16 lr: 0.000125 grad: 0.1305 (0.1383) loss: 0.8507 (0.8548) time: 0.1510 data: 0.0618 max mem: 9377 +Train: [7] [3000/6250] eta: 0:08:58 lr: 0.000125 grad: 0.1309 (0.1381) loss: 0.8506 (0.8546) time: 0.1559 data: 0.0672 max mem: 9377 +Train: [7] [3100/6250] eta: 0:08:40 lr: 0.000125 grad: 0.1227 (0.1381) loss: 0.8513 (0.8545) time: 0.1480 data: 0.0664 max mem: 9377 +Train: [7] [3200/6250] eta: 0:08:24 lr: 0.000125 grad: 0.1180 (0.1380) loss: 0.8513 (0.8543) time: 0.1806 data: 0.0969 max mem: 9377 +Train: [7] [3300/6250] eta: 0:08:07 lr: 0.000125 grad: 0.1474 (0.1382) loss: 0.8490 (0.8541) time: 0.1101 data: 0.0121 max mem: 9377 +Train: [7] [3400/6250] eta: 0:07:50 lr: 0.000125 grad: 0.1242 (0.1381) loss: 0.8409 (0.8539) time: 0.1662 data: 0.0799 max mem: 9377 +Train: [7] [3500/6250] eta: 0:07:33 lr: 0.000125 grad: 0.1400 (0.1382) loss: 0.8516 (0.8539) time: 0.1687 data: 0.0740 max mem: 9377 +Train: [7] [3600/6250] eta: 0:07:16 lr: 0.000125 grad: 0.1228 (0.1381) loss: 0.8458 (0.8537) time: 0.1609 data: 0.0694 max mem: 9377 +Train: [7] [3700/6250] eta: 0:06:59 lr: 0.000125 grad: 0.1179 (0.1378) loss: 0.8503 (0.8536) time: 0.1709 data: 0.0875 max mem: 9377 +Train: [7] [3800/6250] eta: 0:06:43 lr: 0.000125 grad: 0.1191 (0.1378) loss: 0.8545 (0.8535) time: 0.2228 data: 0.1341 max mem: 9377 +Train: [7] [3900/6250] eta: 0:06:26 lr: 0.000125 grad: 0.1307 (0.1377) loss: 0.8484 (0.8533) time: 0.1553 data: 0.0639 max mem: 9377 +Train: [7] [4000/6250] eta: 0:06:09 lr: 0.000125 grad: 0.1316 (0.1375) loss: 0.8402 (0.8531) time: 0.1520 data: 0.0571 max mem: 9377 +Train: [7] [4100/6250] eta: 0:05:53 lr: 0.000125 grad: 0.1194 (0.1373) loss: 0.8419 (0.8530) time: 0.1727 data: 0.0802 max mem: 9377 +Train: [7] [4200/6250] eta: 0:05:37 lr: 0.000125 grad: 0.1308 (0.1374) loss: 0.8463 (0.8529) time: 0.1757 data: 0.0909 max mem: 9377 +Train: [7] [4300/6250] eta: 0:05:20 lr: 0.000125 grad: 0.1239 (0.1374) loss: 0.8492 (0.8527) time: 0.1546 data: 0.0565 max mem: 9377 +Train: [7] [4400/6250] eta: 0:05:03 lr: 0.000125 grad: 0.1291 (0.1374) loss: 0.8477 (0.8526) time: 0.1640 data: 0.0842 max mem: 9377 +Train: [7] [4500/6250] eta: 0:04:47 lr: 0.000125 grad: 0.1257 (0.1373) loss: 0.8523 (0.8525) time: 0.1733 data: 0.0870 max mem: 9377 +Train: [7] [4600/6250] eta: 0:04:30 lr: 0.000125 grad: 0.1336 (0.1373) loss: 0.8504 (0.8524) time: 0.1553 data: 0.0710 max mem: 9377 +Train: [7] [4700/6250] eta: 0:04:13 lr: 0.000125 grad: 0.1142 (0.1371) loss: 0.8557 (0.8524) time: 0.1435 data: 0.0560 max mem: 9377 +Train: [7] [4800/6250] eta: 0:03:57 lr: 0.000125 grad: 0.1186 (0.1369) loss: 0.8458 (0.8523) time: 0.1357 data: 0.0460 max mem: 9377 +Train: [7] [4900/6250] eta: 0:03:41 lr: 0.000125 grad: 0.1278 (0.1368) loss: 0.8420 (0.8521) time: 0.1411 data: 0.0525 max mem: 9377 +Train: [7] [5000/6250] eta: 0:03:24 lr: 0.000125 grad: 0.1321 (0.1366) loss: 0.8390 (0.8520) time: 0.1710 data: 0.0828 max mem: 9377 +Train: [7] [5100/6250] eta: 0:03:08 lr: 0.000125 grad: 0.1169 (0.1365) loss: 0.8502 (0.8518) time: 0.1285 data: 0.0316 max mem: 9377 +Train: [7] [5200/6250] eta: 0:02:51 lr: 0.000125 grad: 0.1324 (0.1364) loss: 0.8445 (0.8516) time: 0.1782 data: 0.0963 max mem: 9377 +Train: [7] [5300/6250] eta: 0:02:35 lr: 0.000125 grad: 0.1141 (0.1364) loss: 0.8480 (0.8514) time: 0.1520 data: 0.0702 max mem: 9377 +Train: [7] [5400/6250] eta: 0:02:18 lr: 0.000125 grad: 0.1140 (0.1363) loss: 0.8466 (0.8513) time: 0.1686 data: 0.0839 max mem: 9377 +Train: [7] [5500/6250] eta: 0:02:02 lr: 0.000125 grad: 0.1217 (0.1363) loss: 0.8432 (0.8511) time: 0.1130 data: 0.0176 max mem: 9377 +Train: [7] [5600/6250] eta: 0:01:46 lr: 0.000125 grad: 0.1117 (0.1363) loss: 0.8503 (0.8511) time: 0.1611 data: 0.0735 max mem: 9377 +Train: [7] [5700/6250] eta: 0:01:29 lr: 0.000125 grad: 0.1117 (0.1360) loss: 0.8446 (0.8510) time: 0.1761 data: 0.0791 max mem: 9377 +Train: [7] [5800/6250] eta: 0:01:13 lr: 0.000125 grad: 0.1328 (0.1361) loss: 0.8403 (0.8509) time: 0.2013 data: 0.1172 max mem: 9377 +Train: [7] [5900/6250] eta: 0:00:57 lr: 0.000125 grad: 0.1331 (0.1360) loss: 0.8408 (0.8508) time: 0.1799 data: 0.0943 max mem: 9377 +Train: [7] [6000/6250] eta: 0:00:40 lr: 0.000125 grad: 0.1152 (0.1358) loss: 0.8428 (0.8506) time: 0.1520 data: 0.0610 max mem: 9377 +Train: [7] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.1202 (0.1357) loss: 0.8349 (0.8505) time: 0.1560 data: 0.0706 max mem: 9377 +Train: [7] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1093 (0.1356) loss: 0.8395 (0.8503) time: 0.1756 data: 0.0854 max mem: 9377 +Train: [7] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1180 (0.1356) loss: 0.8413 (0.8502) time: 0.2061 data: 0.1247 max mem: 9377 +Train: [7] Total time: 0:17:09 (0.1647 s / it) +Averaged stats: lr: 0.000125 grad: 0.1180 (0.1356) loss: 0.8413 (0.8502) +Eval (hcp-train-subset): [7] [ 0/62] eta: 0:04:50 loss: 0.8722 (0.8722) time: 4.6799 data: 4.5877 max mem: 9377 +Eval (hcp-train-subset): [7] [61/62] eta: 0:00:00 loss: 0.8720 (0.8715) time: 0.1225 data: 0.0973 max mem: 9377 +Eval (hcp-train-subset): [7] Total time: 0:00:14 (0.2411 s / it) +Averaged stats (hcp-train-subset): loss: 0.8720 (0.8715) +Eval (hcp-val): [7] [ 0/62] eta: 0:05:37 loss: 0.8664 (0.8664) time: 5.4497 data: 5.4185 max mem: 9377 +Eval (hcp-val): [7] [61/62] eta: 0:00:00 loss: 0.8680 (0.8694) time: 0.1433 data: 0.1182 max mem: 9377 +Eval (hcp-val): [7] Total time: 0:00:15 (0.2421 s / it) +Averaged stats (hcp-val): loss: 0.8680 (0.8694) +Eval (nsd-val): [7] [ 0/62] eta: 0:06:06 loss: 0.8299 (0.8299) time: 5.9058 data: 5.8730 max mem: 9377 +Eval (nsd-val): [7] [61/62] eta: 0:00:00 loss: 0.8404 (0.8400) time: 0.1220 data: 0.0954 max mem: 9377 +Eval (nsd-val): [7] Total time: 0:00:14 (0.2308 s / it) +Averaged stats (nsd-val): loss: 0.8404 (0.8400) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +Train: [8] [ 0/6250] eta: 10:04:16 lr: 0.000125 grad: 0.0765 (0.0765) loss: 0.8852 (0.8852) time: 5.8011 data: 5.5688 max mem: 9377 +Train: [8] [ 100/6250] eta: 0:24:01 lr: 0.000125 grad: 0.1355 (0.1662) loss: 0.8428 (0.8498) time: 0.1817 data: 0.0864 max mem: 9377 +Train: [8] [ 200/6250] eta: 0:20:53 lr: 0.000125 grad: 0.1392 (0.1611) loss: 0.8404 (0.8415) time: 0.1806 data: 0.0759 max mem: 9377 +Train: [8] [ 300/6250] eta: 0:19:42 lr: 0.000125 grad: 0.1152 (0.1547) loss: 0.8425 (0.8409) time: 0.1545 data: 0.0676 max mem: 9377 +Train: [8] [ 400/6250] eta: 0:19:01 lr: 0.000125 grad: 0.1215 (0.1484) loss: 0.8378 (0.8407) time: 0.1853 data: 0.0798 max mem: 9377 +Train: [8] [ 500/6250] eta: 0:18:26 lr: 0.000125 grad: 0.1350 (0.1468) loss: 0.8230 (0.8402) time: 0.1650 data: 0.0743 max mem: 9377 +Train: [8] [ 600/6250] eta: 0:17:44 lr: 0.000125 grad: 0.1264 (0.1446) loss: 0.8384 (0.8398) time: 0.1854 data: 0.0865 max mem: 9377 +Train: [8] [ 700/6250] eta: 0:17:13 lr: 0.000125 grad: 0.1109 (0.1422) loss: 0.8491 (0.8399) time: 0.1456 data: 0.0478 max mem: 9377 +Train: [8] [ 800/6250] eta: 0:16:50 lr: 0.000125 grad: 0.1162 (0.1405) loss: 0.8449 (0.8404) time: 0.1946 data: 0.1033 max mem: 9377 +Train: [8] [ 900/6250] eta: 0:16:27 lr: 0.000125 grad: 0.1261 (0.1389) loss: 0.8442 (0.8407) time: 0.1852 data: 0.0855 max mem: 9377 +Train: [8] [1000/6250] eta: 0:16:05 lr: 0.000125 grad: 0.1162 (0.1374) loss: 0.8410 (0.8411) time: 0.1809 data: 0.0833 max mem: 9377 +Train: [8] [1100/6250] eta: 0:15:37 lr: 0.000125 grad: 0.1345 (0.1377) loss: 0.8498 (0.8413) time: 0.1538 data: 0.0574 max mem: 9377 +Train: [8] [1200/6250] eta: 0:15:10 lr: 0.000125 grad: 0.1297 (0.1375) loss: 0.8334 (0.8408) time: 0.1566 data: 0.0663 max mem: 9377 +Train: [8] [1300/6250] eta: 0:14:43 lr: 0.000125 grad: 0.1482 (0.1372) loss: 0.8301 (0.8404) time: 0.1662 data: 0.0789 max mem: 9377 +Train: [8] [1400/6250] eta: 0:14:19 lr: 0.000125 grad: 0.1266 (0.1369) loss: 0.8384 (0.8398) time: 0.1698 data: 0.0753 max mem: 9377 +Train: [8] [1500/6250] eta: 0:13:54 lr: 0.000125 grad: 0.1288 (0.1365) loss: 0.8383 (0.8394) time: 0.1369 data: 0.0574 max mem: 9377 +Train: [8] [1600/6250] eta: 0:13:30 lr: 0.000125 grad: 0.1239 (0.1364) loss: 0.8347 (0.8389) time: 0.1557 data: 0.0627 max mem: 9377 +Train: [8] [1700/6250] eta: 0:13:09 lr: 0.000125 grad: 0.1232 (0.1359) loss: 0.8380 (0.8387) time: 0.1612 data: 0.0725 max mem: 9377 +Train: [8] [1800/6250] eta: 0:12:50 lr: 0.000125 grad: 0.1247 (0.1356) loss: 0.8447 (0.8386) time: 0.1462 data: 0.0587 max mem: 9377 +Train: [8] [1900/6250] eta: 0:12:30 lr: 0.000125 grad: 0.1163 (0.1356) loss: 0.8320 (0.8382) time: 0.1220 data: 0.0265 max mem: 9377 +Train: [8] [2000/6250] eta: 0:12:13 lr: 0.000125 grad: 0.1122 (0.1358) loss: 0.8363 (0.8378) time: 0.1374 data: 0.0511 max mem: 9377 +Train: [8] [2100/6250] eta: 0:11:54 lr: 0.000125 grad: 0.1277 (0.1356) loss: 0.8318 (0.8376) time: 0.1489 data: 0.0502 max mem: 9377 +Train: [8] [2200/6250] eta: 0:11:33 lr: 0.000125 grad: 0.1344 (0.1362) loss: 0.8195 (0.8372) time: 0.1398 data: 0.0444 max mem: 9377 +Train: [8] [2300/6250] eta: 0:11:13 lr: 0.000125 grad: 0.1223 (0.1365) loss: 0.8368 (0.8371) time: 0.1624 data: 0.0679 max mem: 9377 +Train: [8] [2400/6250] eta: 0:10:53 lr: 0.000125 grad: 0.1294 (0.1363) loss: 0.8510 (0.8369) time: 0.1526 data: 0.0651 max mem: 9377 +Train: [8] [2500/6250] eta: 0:10:39 lr: 0.000125 grad: 0.1236 (0.1362) loss: 0.8315 (0.8369) time: 0.1822 data: 0.0853 max mem: 9377 +Train: [8] [2600/6250] eta: 0:10:19 lr: 0.000125 grad: 0.1192 (0.1362) loss: 0.8359 (0.8368) time: 0.1558 data: 0.0694 max mem: 9377 +Train: [8] [2700/6250] eta: 0:10:00 lr: 0.000125 grad: 0.1274 (0.1363) loss: 0.8419 (0.8368) time: 0.1759 data: 0.0975 max mem: 9377 +Train: [8] [2800/6250] eta: 0:09:41 lr: 0.000125 grad: 0.1107 (0.1360) loss: 0.8368 (0.8367) time: 0.1389 data: 0.0527 max mem: 9377 +Train: [8] [2900/6250] eta: 0:09:23 lr: 0.000125 grad: 0.1294 (0.1359) loss: 0.8326 (0.8366) time: 0.1483 data: 0.0673 max mem: 9377 +Train: [8] [3000/6250] eta: 0:09:05 lr: 0.000125 grad: 0.1150 (0.1358) loss: 0.8402 (0.8366) time: 0.1594 data: 0.0766 max mem: 9377 +Train: [8] [3100/6250] eta: 0:08:47 lr: 0.000125 grad: 0.1233 (0.1355) loss: 0.8407 (0.8365) time: 0.1493 data: 0.0617 max mem: 9377 +Train: [8] [3200/6250] eta: 0:08:29 lr: 0.000125 grad: 0.1436 (0.1358) loss: 0.8271 (0.8364) time: 0.1538 data: 0.0643 max mem: 9377 +Train: [8] [3300/6250] eta: 0:08:11 lr: 0.000125 grad: 0.1225 (0.1356) loss: 0.8343 (0.8364) time: 0.1741 data: 0.0920 max mem: 9377 +Train: [8] [3400/6250] eta: 0:07:54 lr: 0.000125 grad: 0.1210 (0.1353) loss: 0.8257 (0.8363) time: 0.1151 data: 0.0297 max mem: 9377 +Train: [8] [3500/6250] eta: 0:07:36 lr: 0.000125 grad: 0.1178 (0.1351) loss: 0.8430 (0.8362) time: 0.1649 data: 0.0836 max mem: 9377 +Train: [8] [3600/6250] eta: 0:07:20 lr: 0.000125 grad: 0.1117 (0.1348) loss: 0.8391 (0.8361) time: 0.1500 data: 0.0618 max mem: 9377 +Train: [8] [3700/6250] eta: 0:07:03 lr: 0.000125 grad: 0.1194 (0.1347) loss: 0.8419 (0.8361) time: 0.1752 data: 0.0843 max mem: 9377 +Train: [8] [3800/6250] eta: 0:06:46 lr: 0.000125 grad: 0.1159 (0.1345) loss: 0.8374 (0.8361) time: 0.1814 data: 0.0960 max mem: 9377 +Train: [8] [3900/6250] eta: 0:06:29 lr: 0.000125 grad: 0.1231 (0.1342) loss: 0.8317 (0.8361) time: 0.1494 data: 0.0604 max mem: 9377 +Train: [8] [4000/6250] eta: 0:06:12 lr: 0.000125 grad: 0.1281 (0.1343) loss: 0.8365 (0.8360) time: 0.1512 data: 0.0517 max mem: 9377 +Train: [8] [4100/6250] eta: 0:05:56 lr: 0.000125 grad: 0.1259 (0.1340) loss: 0.8319 (0.8360) time: 0.1524 data: 0.0650 max mem: 9377 +Train: [8] [4200/6250] eta: 0:05:40 lr: 0.000125 grad: 0.1252 (0.1338) loss: 0.8391 (0.8361) time: 0.1249 data: 0.0242 max mem: 9377 +Train: [8] [4300/6250] eta: 0:05:23 lr: 0.000125 grad: 0.1162 (0.1338) loss: 0.8408 (0.8361) time: 0.1545 data: 0.0657 max mem: 9377 +Train: [8] [4400/6250] eta: 0:05:06 lr: 0.000125 grad: 0.1258 (0.1336) loss: 0.8436 (0.8361) time: 0.1614 data: 0.0753 max mem: 9377 +Train: [8] [4500/6250] eta: 0:04:49 lr: 0.000125 grad: 0.1149 (0.1334) loss: 0.8464 (0.8361) time: 0.1424 data: 0.0543 max mem: 9377 +Train: [8] [4600/6250] eta: 0:04:32 lr: 0.000125 grad: 0.1424 (0.1334) loss: 0.8328 (0.8361) time: 0.1473 data: 0.0624 max mem: 9377 +Train: [8] [4700/6250] eta: 0:04:15 lr: 0.000125 grad: 0.1176 (0.1334) loss: 0.8380 (0.8361) time: 0.1534 data: 0.0621 max mem: 9377 +Train: [8] [4800/6250] eta: 0:03:58 lr: 0.000125 grad: 0.1320 (0.1334) loss: 0.8364 (0.8361) time: 0.1427 data: 0.0523 max mem: 9377 +Train: [8] [4900/6250] eta: 0:03:42 lr: 0.000125 grad: 0.1160 (0.1333) loss: 0.8388 (0.8360) time: 0.1591 data: 0.0677 max mem: 9377 +Train: [8] [5000/6250] eta: 0:03:25 lr: 0.000125 grad: 0.1204 (0.1333) loss: 0.8336 (0.8360) time: 0.1623 data: 0.0785 max mem: 9377 +Train: [8] [5100/6250] eta: 0:03:08 lr: 0.000125 grad: 0.1152 (0.1333) loss: 0.8372 (0.8359) time: 0.1352 data: 0.0557 max mem: 9377 +Train: [8] [5200/6250] eta: 0:02:52 lr: 0.000124 grad: 0.1259 (0.1333) loss: 0.8384 (0.8359) time: 0.1637 data: 0.0802 max mem: 9377 +Train: [8] [5300/6250] eta: 0:02:35 lr: 0.000124 grad: 0.1271 (0.1331) loss: 0.8373 (0.8358) time: 0.1685 data: 0.0844 max mem: 9377 +Train: [8] [5400/6250] eta: 0:02:19 lr: 0.000124 grad: 0.1268 (0.1331) loss: 0.8331 (0.8358) time: 0.2327 data: 0.1437 max mem: 9377 +Train: [8] [5500/6250] eta: 0:02:02 lr: 0.000124 grad: 0.1237 (0.1333) loss: 0.8238 (0.8356) time: 0.1453 data: 0.0554 max mem: 9377 +Train: [8] [5600/6250] eta: 0:01:46 lr: 0.000124 grad: 0.1148 (0.1332) loss: 0.8406 (0.8356) time: 0.1705 data: 0.0824 max mem: 9377 +Train: [8] [5700/6250] eta: 0:01:30 lr: 0.000124 grad: 0.1165 (0.1331) loss: 0.8324 (0.8355) time: 0.1366 data: 0.0329 max mem: 9377 +Train: [8] [5800/6250] eta: 0:01:13 lr: 0.000124 grad: 0.1204 (0.1331) loss: 0.8260 (0.8355) time: 0.1313 data: 0.0415 max mem: 9377 +Train: [8] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.1196 (0.1330) loss: 0.8391 (0.8354) time: 0.1318 data: 0.0532 max mem: 9377 +Train: [8] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.1165 (0.1329) loss: 0.8366 (0.8354) time: 0.1623 data: 0.0760 max mem: 9377 +Train: [8] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.1167 (0.1329) loss: 0.8371 (0.8353) time: 0.1499 data: 0.0623 max mem: 9377 +Train: [8] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.1252 (0.1327) loss: 0.8266 (0.8353) time: 0.1575 data: 0.0672 max mem: 9377 +Train: [8] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.1325 (0.1328) loss: 0.8361 (0.8352) time: 0.1869 data: 0.1041 max mem: 9377 +Train: [8] Total time: 0:17:10 (0.1649 s / it) +Averaged stats: lr: 0.000124 grad: 0.1325 (0.1328) loss: 0.8361 (0.8352) +Eval (hcp-train-subset): [8] [ 0/62] eta: 0:04:03 loss: 0.8690 (0.8690) time: 3.9301 data: 3.8575 max mem: 9377 +Eval (hcp-train-subset): [8] [61/62] eta: 0:00:00 loss: 0.8710 (0.8695) time: 0.1329 data: 0.1076 max mem: 9377 +Eval (hcp-train-subset): [8] Total time: 0:00:14 (0.2355 s / it) +Averaged stats (hcp-train-subset): loss: 0.8710 (0.8695) +Eval (hcp-val): [8] [ 0/62] eta: 0:04:28 loss: 0.8642 (0.8642) time: 4.3274 data: 4.2433 max mem: 9377 +Eval (hcp-val): [8] [61/62] eta: 0:00:00 loss: 0.8662 (0.8680) time: 0.1340 data: 0.1073 max mem: 9377 +Eval (hcp-val): [8] Total time: 0:00:15 (0.2421 s / it) +Averaged stats (hcp-val): loss: 0.8662 (0.8680) +Eval (nsd-val): [8] [ 0/62] eta: 0:05:40 loss: 0.8264 (0.8264) time: 5.4995 data: 5.4693 max mem: 9377 +Eval (nsd-val): [8] [61/62] eta: 0:00:00 loss: 0.8391 (0.8405) time: 0.1076 data: 0.0812 max mem: 9377 +Eval (nsd-val): [8] Total time: 0:00:13 (0.2214 s / it) +Averaged stats (nsd-val): loss: 0.8391 (0.8405) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +Train: [9] [ 0/6250] eta: 9:48:43 lr: 0.000124 grad: 0.1253 (0.1253) loss: 0.8514 (0.8514) time: 5.6517 data: 5.4909 max mem: 9377 +Train: [9] [ 100/6250] eta: 0:21:59 lr: 0.000124 grad: 0.1261 (0.2011) loss: 0.8286 (0.8325) time: 0.1422 data: 0.0584 max mem: 9377 +Train: [9] [ 200/6250] eta: 0:19:08 lr: 0.000124 grad: 0.1285 (0.1709) loss: 0.8297 (0.8313) time: 0.1576 data: 0.0602 max mem: 9377 +Train: [9] [ 300/6250] eta: 0:18:19 lr: 0.000124 grad: 0.1262 (0.1598) loss: 0.8292 (0.8305) time: 0.1912 data: 0.1058 max mem: 9377 +Train: [9] [ 400/6250] eta: 0:17:45 lr: 0.000124 grad: 0.1297 (0.1526) loss: 0.8143 (0.8295) time: 0.1737 data: 0.0748 max mem: 9377 +Train: [9] [ 500/6250] eta: 0:17:31 lr: 0.000124 grad: 0.1305 (0.1489) loss: 0.8241 (0.8289) time: 0.1838 data: 0.0904 max mem: 9377 +Train: [9] [ 600/6250] eta: 0:16:52 lr: 0.000124 grad: 0.1197 (0.1463) loss: 0.8173 (0.8281) time: 0.1629 data: 0.0693 max mem: 9377 +Train: [9] [ 700/6250] eta: 0:16:31 lr: 0.000124 grad: 0.1235 (0.1431) loss: 0.8249 (0.8286) time: 0.1781 data: 0.0887 max mem: 9377 +Train: [9] [ 800/6250] eta: 0:16:00 lr: 0.000124 grad: 0.1293 (0.1429) loss: 0.8258 (0.8284) time: 0.1843 data: 0.0751 max mem: 9377 +Train: [9] [ 900/6250] eta: 0:15:46 lr: 0.000124 grad: 0.1180 (0.1414) loss: 0.8245 (0.8282) time: 0.1844 data: 0.0771 max mem: 9377 +Train: [9] [1000/6250] eta: 0:15:29 lr: 0.000124 grad: 0.1273 (0.1397) loss: 0.8274 (0.8280) time: 0.2011 data: 0.1050 max mem: 9377 +Train: [9] [1100/6250] eta: 0:15:05 lr: 0.000124 grad: 0.1273 (0.1392) loss: 0.8252 (0.8279) time: 0.1688 data: 0.0701 max mem: 9377 +Train: [9] [1200/6250] eta: 0:14:42 lr: 0.000124 grad: 0.1261 (0.1389) loss: 0.8194 (0.8277) time: 0.1769 data: 0.0816 max mem: 9377 +Train: [9] [1300/6250] eta: 0:14:17 lr: 0.000124 grad: 0.1170 (0.1378) loss: 0.8246 (0.8276) time: 0.1668 data: 0.0730 max mem: 9377 +Train: [9] [1400/6250] eta: 0:13:57 lr: 0.000124 grad: 0.1246 (0.1370) loss: 0.8208 (0.8275) time: 0.1825 data: 0.0934 max mem: 9377 +Train: [9] [1500/6250] eta: 0:13:32 lr: 0.000124 grad: 0.1237 (0.1362) loss: 0.8250 (0.8273) time: 0.1599 data: 0.0698 max mem: 9377 +Train: [9] [1600/6250] eta: 0:13:12 lr: 0.000124 grad: 0.1182 (0.1358) loss: 0.8371 (0.8274) time: 0.1713 data: 0.0906 max mem: 9377 +Train: [9] [1700/6250] eta: 0:12:55 lr: 0.000124 grad: 0.1257 (0.1352) loss: 0.8315 (0.8274) time: 0.1826 data: 0.1024 max mem: 9377 +Train: [9] [1800/6250] eta: 0:12:34 lr: 0.000124 grad: 0.1245 (0.1348) loss: 0.8244 (0.8273) time: 0.1457 data: 0.0532 max mem: 9377 +Train: [9] [1900/6250] eta: 0:12:16 lr: 0.000124 grad: 0.1414 (0.1350) loss: 0.8272 (0.8272) time: 0.1564 data: 0.0678 max mem: 9377 +Train: [9] [2000/6250] eta: 0:12:01 lr: 0.000124 grad: 0.1186 (0.1345) loss: 0.8315 (0.8271) time: 0.1540 data: 0.0670 max mem: 9377 +Train: [9] [2100/6250] eta: 0:11:44 lr: 0.000124 grad: 0.1263 (0.1345) loss: 0.8392 (0.8271) time: 0.1770 data: 0.0948 max mem: 9377 +Train: [9] [2200/6250] eta: 0:11:25 lr: 0.000124 grad: 0.1388 (0.1344) loss: 0.8167 (0.8270) time: 0.1577 data: 0.0706 max mem: 9377 +Train: [9] [2300/6250] eta: 0:11:07 lr: 0.000124 grad: 0.1240 (0.1342) loss: 0.8103 (0.8267) time: 0.1524 data: 0.0653 max mem: 9377 +Train: [9] [2400/6250] eta: 0:10:49 lr: 0.000124 grad: 0.1295 (0.1339) loss: 0.8214 (0.8266) time: 0.1667 data: 0.0810 max mem: 9377 +Train: [9] [2500/6250] eta: 0:10:33 lr: 0.000124 grad: 0.1146 (0.1336) loss: 0.8142 (0.8263) time: 0.1519 data: 0.0701 max mem: 9377 +Train: [9] [2600/6250] eta: 0:10:16 lr: 0.000124 grad: 0.1298 (0.1335) loss: 0.8147 (0.8261) time: 0.0967 data: 0.0051 max mem: 9377 +Train: [9] [2700/6250] eta: 0:10:01 lr: 0.000124 grad: 0.1592 (0.1339) loss: 0.8120 (0.8259) time: 0.1798 data: 0.0958 max mem: 9377 +Train: [9] [2800/6250] eta: 0:09:44 lr: 0.000124 grad: 0.1427 (0.1341) loss: 0.8186 (0.8257) time: 0.1683 data: 0.0818 max mem: 9377 +Train: [9] [2900/6250] eta: 0:09:27 lr: 0.000124 grad: 0.1227 (0.1339) loss: 0.8260 (0.8256) time: 0.1662 data: 0.0820 max mem: 9377 +Train: [9] [3000/6250] eta: 0:09:10 lr: 0.000124 grad: 0.1248 (0.1337) loss: 0.8269 (0.8255) time: 0.1545 data: 0.0672 max mem: 9377 +Train: [9] [3100/6250] eta: 0:08:52 lr: 0.000124 grad: 0.1291 (0.1336) loss: 0.8187 (0.8255) time: 0.1712 data: 0.0881 max mem: 9377 +Train: [9] [3200/6250] eta: 0:08:34 lr: 0.000124 grad: 0.1280 (0.1339) loss: 0.8218 (0.8254) time: 0.1582 data: 0.0710 max mem: 9377 +Train: [9] [3300/6250] eta: 0:08:16 lr: 0.000124 grad: 0.1318 (0.1340) loss: 0.8277 (0.8253) time: 0.1558 data: 0.0645 max mem: 9377 +Train: [9] [3400/6250] eta: 0:07:58 lr: 0.000124 grad: 0.1280 (0.1339) loss: 0.8238 (0.8252) time: 0.1625 data: 0.0734 max mem: 9377 +Train: [9] [3500/6250] eta: 0:07:39 lr: 0.000124 grad: 0.1166 (0.1340) loss: 0.8300 (0.8251) time: 0.1584 data: 0.0689 max mem: 9377 +Train: [9] [3600/6250] eta: 0:07:22 lr: 0.000124 grad: 0.1218 (0.1337) loss: 0.8236 (0.8249) time: 0.1557 data: 0.0648 max mem: 9377 +Train: [9] [3700/6250] eta: 0:07:04 lr: 0.000124 grad: 0.1225 (0.1339) loss: 0.8191 (0.8248) time: 0.1448 data: 0.0522 max mem: 9377 +Train: [9] [3800/6250] eta: 0:06:47 lr: 0.000124 grad: 0.1454 (0.1339) loss: 0.8210 (0.8247) time: 0.1672 data: 0.0743 max mem: 9377 +Train: [9] [3900/6250] eta: 0:06:30 lr: 0.000124 grad: 0.1240 (0.1340) loss: 0.8254 (0.8245) time: 0.1329 data: 0.0509 max mem: 9377 +Train: [9] [4000/6250] eta: 0:06:13 lr: 0.000124 grad: 0.1375 (0.1341) loss: 0.8251 (0.8244) time: 0.1631 data: 0.0689 max mem: 9377 +Train: [9] [4100/6250] eta: 0:05:56 lr: 0.000124 grad: 0.1269 (0.1342) loss: 0.8208 (0.8242) time: 0.1554 data: 0.0621 max mem: 9377 +Train: [9] [4200/6250] eta: 0:05:39 lr: 0.000124 grad: 0.1226 (0.1343) loss: 0.8223 (0.8241) time: 0.1531 data: 0.0636 max mem: 9377 +Train: [9] [4300/6250] eta: 0:05:21 lr: 0.000124 grad: 0.1315 (0.1342) loss: 0.8206 (0.8240) time: 0.1545 data: 0.0646 max mem: 9377 +Train: [9] [4400/6250] eta: 0:05:05 lr: 0.000124 grad: 0.1235 (0.1341) loss: 0.8249 (0.8240) time: 0.1681 data: 0.0788 max mem: 9377 +Train: [9] [4500/6250] eta: 0:04:48 lr: 0.000124 grad: 0.1219 (0.1340) loss: 0.8200 (0.8239) time: 0.1500 data: 0.0624 max mem: 9377 +Train: [9] [4600/6250] eta: 0:04:31 lr: 0.000124 grad: 0.1371 (0.1340) loss: 0.8240 (0.8237) time: 0.1464 data: 0.0611 max mem: 9377 +Train: [9] [4700/6250] eta: 0:04:15 lr: 0.000124 grad: 0.1261 (0.1341) loss: 0.8168 (0.8235) time: 0.1712 data: 0.0942 max mem: 9377 +Train: [9] [4800/6250] eta: 0:03:58 lr: 0.000124 grad: 0.1301 (0.1341) loss: 0.8309 (0.8233) time: 0.1719 data: 0.0888 max mem: 9377 +Train: [9] [4900/6250] eta: 0:03:41 lr: 0.000124 grad: 0.1190 (0.1339) loss: 0.8240 (0.8232) time: 0.1561 data: 0.0714 max mem: 9377 +Train: [9] [5000/6250] eta: 0:03:24 lr: 0.000124 grad: 0.1277 (0.1338) loss: 0.8185 (0.8231) time: 0.1574 data: 0.0684 max mem: 9377 +Train: [9] [5100/6250] eta: 0:03:08 lr: 0.000124 grad: 0.1385 (0.1338) loss: 0.8128 (0.8231) time: 0.1590 data: 0.0709 max mem: 9377 +Train: [9] [5200/6250] eta: 0:02:51 lr: 0.000124 grad: 0.1140 (0.1338) loss: 0.8157 (0.8229) time: 0.1521 data: 0.0583 max mem: 9377 +Train: [9] [5300/6250] eta: 0:02:35 lr: 0.000124 grad: 0.1322 (0.1338) loss: 0.8157 (0.8227) time: 0.1458 data: 0.0610 max mem: 9377 +Train: [9] [5400/6250] eta: 0:02:19 lr: 0.000124 grad: 0.1202 (0.1339) loss: 0.8243 (0.8226) time: 0.1669 data: 0.0859 max mem: 9377 +Train: [9] [5500/6250] eta: 0:02:02 lr: 0.000124 grad: 0.1387 (0.1341) loss: 0.8224 (0.8225) time: 0.1795 data: 0.0948 max mem: 9377 +Train: [9] [5600/6250] eta: 0:01:46 lr: 0.000124 grad: 0.1299 (0.1340) loss: 0.8071 (0.8224) time: 0.1442 data: 0.0592 max mem: 9377 +Train: [9] [5700/6250] eta: 0:01:29 lr: 0.000124 grad: 0.1340 (0.1341) loss: 0.8154 (0.8222) time: 0.1423 data: 0.0518 max mem: 9377 +Train: [9] [5800/6250] eta: 0:01:13 lr: 0.000124 grad: 0.1398 (0.1340) loss: 0.8034 (0.8220) time: 0.1456 data: 0.0563 max mem: 9377 +Train: [9] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.1284 (0.1340) loss: 0.8216 (0.8219) time: 0.1457 data: 0.0541 max mem: 9377 +Train: [9] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.1301 (0.1340) loss: 0.8212 (0.8218) time: 0.1310 data: 0.0434 max mem: 9377 +Train: [9] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.1340 (0.1339) loss: 0.8196 (0.8218) time: 0.1542 data: 0.0628 max mem: 9377 +Train: [9] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.1387 (0.1339) loss: 0.8119 (0.8216) time: 0.1545 data: 0.0689 max mem: 9377 +Train: [9] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.1336 (0.1339) loss: 0.8206 (0.8216) time: 0.2073 data: 0.1264 max mem: 9377 +Train: [9] Total time: 0:17:04 (0.1640 s / it) +Averaged stats: lr: 0.000124 grad: 0.1336 (0.1339) loss: 0.8206 (0.8216) +Eval (hcp-train-subset): [9] [ 0/62] eta: 0:05:36 loss: 0.8630 (0.8630) time: 5.4246 data: 5.3935 max mem: 9377 +Eval (hcp-train-subset): [9] [61/62] eta: 0:00:00 loss: 0.8672 (0.8693) time: 0.1666 data: 0.1393 max mem: 9377 +Eval (hcp-train-subset): [9] Total time: 0:00:14 (0.2389 s / it) +Averaged stats (hcp-train-subset): loss: 0.8672 (0.8693) +Making plots (hcp-train-subset): example=43 +Eval (hcp-val): [9] [ 0/62] eta: 0:05:25 loss: 0.8648 (0.8648) time: 5.2568 data: 5.2255 max mem: 9377 +Eval (hcp-val): [9] [61/62] eta: 0:00:00 loss: 0.8689 (0.8677) time: 0.1368 data: 0.1117 max mem: 9377 +Eval (hcp-val): [9] Total time: 0:00:14 (0.2352 s / it) +Averaged stats (hcp-val): loss: 0.8689 (0.8677) +Making plots (hcp-val): example=26 +Eval (nsd-val): [9] [ 0/62] eta: 0:06:02 loss: 0.8280 (0.8280) time: 5.8466 data: 5.8152 max mem: 9377 +Eval (nsd-val): [9] [61/62] eta: 0:00:00 loss: 0.8387 (0.8414) time: 0.1129 data: 0.0881 max mem: 9377 +Eval (nsd-val): [9] Total time: 0:00:14 (0.2327 s / it) +Averaged stats (nsd-val): loss: 0.8387 (0.8414) +Making plots (nsd-val): example=36 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00009.pth +Train: [10] [ 0/6250] eta: 8:12:08 lr: 0.000124 grad: 0.4345 (0.4345) loss: 0.8099 (0.8099) time: 4.7246 data: 4.4497 max mem: 9377 +Train: [10] [ 100/6250] eta: 0:24:14 lr: 0.000124 grad: 0.1393 (0.1998) loss: 0.8420 (0.8273) time: 0.1802 data: 0.0832 max mem: 9377 +Train: [10] [ 200/6250] eta: 0:20:56 lr: 0.000124 grad: 0.1340 (0.1769) loss: 0.8352 (0.8236) time: 0.1733 data: 0.0914 max mem: 9377 +Train: [10] [ 300/6250] eta: 0:20:06 lr: 0.000124 grad: 0.1558 (0.1681) loss: 0.8148 (0.8202) time: 0.1855 data: 0.0883 max mem: 9377 +Train: [10] [ 400/6250] eta: 0:18:57 lr: 0.000124 grad: 0.1270 (0.1617) loss: 0.8089 (0.8185) time: 0.1879 data: 0.0971 max mem: 9377 +Train: [10] [ 500/6250] eta: 0:18:28 lr: 0.000124 grad: 0.1309 (0.1565) loss: 0.8070 (0.8168) time: 0.1873 data: 0.1014 max mem: 9377 +Train: [10] [ 600/6250] eta: 0:17:39 lr: 0.000124 grad: 0.1188 (0.1523) loss: 0.8195 (0.8157) time: 0.1720 data: 0.0831 max mem: 9377 +Train: [10] [ 700/6250] eta: 0:17:01 lr: 0.000124 grad: 0.1355 (0.1488) loss: 0.8186 (0.8154) time: 0.1775 data: 0.0919 max mem: 9377 +Train: [10] [ 800/6250] eta: 0:16:32 lr: 0.000124 grad: 0.1394 (0.1466) loss: 0.8073 (0.8146) time: 0.1733 data: 0.0777 max mem: 9377 +Train: [10] [ 900/6250] eta: 0:16:08 lr: 0.000124 grad: 0.1388 (0.1458) loss: 0.8029 (0.8137) time: 0.1702 data: 0.0701 max mem: 9377 +Train: [10] [1000/6250] eta: 0:15:43 lr: 0.000124 grad: 0.1439 (0.1444) loss: 0.8087 (0.8133) time: 0.1827 data: 0.0690 max mem: 9377 +Train: [10] [1100/6250] eta: 0:15:18 lr: 0.000124 grad: 0.1252 (0.1432) loss: 0.8062 (0.8124) time: 0.1555 data: 0.0494 max mem: 9377 +Train: [10] [1200/6250] eta: 0:14:56 lr: 0.000124 grad: 0.1256 (0.1423) loss: 0.8031 (0.8119) time: 0.1584 data: 0.0580 max mem: 9377 +Train: [10] [1300/6250] eta: 0:14:29 lr: 0.000124 grad: 0.1242 (0.1416) loss: 0.8120 (0.8111) time: 0.1502 data: 0.0572 max mem: 9377 +Train: [10] [1400/6250] eta: 0:14:04 lr: 0.000124 grad: 0.1264 (0.1411) loss: 0.7986 (0.8107) time: 0.1414 data: 0.0490 max mem: 9377 +Train: [10] [1500/6250] eta: 0:13:41 lr: 0.000124 grad: 0.1265 (0.1402) loss: 0.8044 (0.8104) time: 0.1519 data: 0.0707 max mem: 9377 +Train: [10] [1600/6250] eta: 0:13:21 lr: 0.000124 grad: 0.1263 (0.1399) loss: 0.8042 (0.8101) time: 0.1734 data: 0.0876 max mem: 9377 +Train: [10] [1700/6250] eta: 0:13:05 lr: 0.000124 grad: 0.1448 (0.1401) loss: 0.7898 (0.8099) time: 0.2012 data: 0.1199 max mem: 9377 +Train: [10] [1800/6250] eta: 0:12:49 lr: 0.000124 grad: 0.1387 (0.1405) loss: 0.8066 (0.8095) time: 0.1743 data: 0.0984 max mem: 9377 +Train: [10] [1900/6250] eta: 0:12:31 lr: 0.000124 grad: 0.1353 (0.1404) loss: 0.7998 (0.8091) time: 0.1690 data: 0.0919 max mem: 9377 +Train: [10] [2000/6250] eta: 0:12:13 lr: 0.000124 grad: 0.1430 (0.1403) loss: 0.7968 (0.8088) time: 0.1305 data: 0.0442 max mem: 9377 +Train: [10] [2100/6250] eta: 0:11:53 lr: 0.000124 grad: 0.1501 (0.1403) loss: 0.7990 (0.8084) time: 0.1365 data: 0.0623 max mem: 9377 +Train: [10] [2200/6250] eta: 0:11:35 lr: 0.000124 grad: 0.1247 (0.1405) loss: 0.8019 (0.8079) time: 0.1357 data: 0.0543 max mem: 9377 +Train: [10] [2300/6250] eta: 0:11:17 lr: 0.000124 grad: 0.1349 (0.1403) loss: 0.7961 (0.8077) time: 0.1429 data: 0.0522 max mem: 9377 +Train: [10] [2400/6250] eta: 0:10:58 lr: 0.000124 grad: 0.1354 (0.1404) loss: 0.7988 (0.8074) time: 0.1581 data: 0.0784 max mem: 9377 +Train: [10] [2500/6250] eta: 0:10:39 lr: 0.000124 grad: 0.1286 (0.1401) loss: 0.8023 (0.8072) time: 0.1507 data: 0.0648 max mem: 9377 +Train: [10] [2600/6250] eta: 0:10:19 lr: 0.000124 grad: 0.1253 (0.1398) loss: 0.8076 (0.8072) time: 0.1650 data: 0.0791 max mem: 9377 +Train: [10] [2700/6250] eta: 0:10:00 lr: 0.000124 grad: 0.1282 (0.1395) loss: 0.8122 (0.8073) time: 0.1569 data: 0.0643 max mem: 9377 +Train: [10] [2800/6250] eta: 0:09:42 lr: 0.000124 grad: 0.1236 (0.1393) loss: 0.8082 (0.8072) time: 0.1593 data: 0.0702 max mem: 9377 +Train: [10] [2900/6250] eta: 0:09:25 lr: 0.000124 grad: 0.1317 (0.1394) loss: 0.8068 (0.8072) time: 0.1786 data: 0.0845 max mem: 9377 +Train: [10] [3000/6250] eta: 0:09:07 lr: 0.000124 grad: 0.1358 (0.1395) loss: 0.8047 (0.8071) time: 0.1465 data: 0.0595 max mem: 9377 +Train: [10] [3100/6250] eta: 0:08:50 lr: 0.000124 grad: 0.1235 (0.1395) loss: 0.8097 (0.8072) time: 0.1388 data: 0.0490 max mem: 9377 +Train: [10] [3200/6250] eta: 0:08:32 lr: 0.000124 grad: 0.1349 (0.1396) loss: 0.8031 (0.8071) time: 0.1572 data: 0.0691 max mem: 9377 +Train: [10] [3300/6250] eta: 0:08:15 lr: 0.000124 grad: 0.1367 (0.1396) loss: 0.8120 (0.8072) time: 0.1704 data: 0.0833 max mem: 9377 +Train: [10] [3400/6250] eta: 0:07:57 lr: 0.000124 grad: 0.1310 (0.1395) loss: 0.8050 (0.8072) time: 0.1789 data: 0.1002 max mem: 9377 +Train: [10] [3500/6250] eta: 0:07:39 lr: 0.000124 grad: 0.1357 (0.1395) loss: 0.8073 (0.8072) time: 0.1546 data: 0.0697 max mem: 9377 +Train: [10] [3600/6250] eta: 0:07:21 lr: 0.000124 grad: 0.1395 (0.1393) loss: 0.8037 (0.8073) time: 0.1397 data: 0.0463 max mem: 9377 +Train: [10] [3700/6250] eta: 0:07:04 lr: 0.000124 grad: 0.1331 (0.1392) loss: 0.8047 (0.8072) time: 0.1483 data: 0.0584 max mem: 9377 +Train: [10] [3800/6250] eta: 0:06:46 lr: 0.000124 grad: 0.1301 (0.1393) loss: 0.8170 (0.8071) time: 0.1254 data: 0.0374 max mem: 9377 +Train: [10] [3900/6250] eta: 0:06:29 lr: 0.000124 grad: 0.1287 (0.1393) loss: 0.8117 (0.8071) time: 0.1450 data: 0.0672 max mem: 9377 +Train: [10] [4000/6250] eta: 0:06:12 lr: 0.000124 grad: 0.1278 (0.1394) loss: 0.8043 (0.8071) time: 0.1332 data: 0.0461 max mem: 9377 +Train: [10] [4100/6250] eta: 0:05:55 lr: 0.000124 grad: 0.1287 (0.1395) loss: 0.8130 (0.8071) time: 0.1642 data: 0.0829 max mem: 9377 +Train: [10] [4200/6250] eta: 0:05:38 lr: 0.000124 grad: 0.1300 (0.1395) loss: 0.8180 (0.8070) time: 0.1483 data: 0.0567 max mem: 9377 +Train: [10] [4300/6250] eta: 0:05:22 lr: 0.000124 grad: 0.1392 (0.1395) loss: 0.7919 (0.8069) time: 0.1867 data: 0.1003 max mem: 9377 +Train: [10] [4400/6250] eta: 0:05:04 lr: 0.000124 grad: 0.1317 (0.1395) loss: 0.8042 (0.8067) time: 0.1512 data: 0.0567 max mem: 9377 +Train: [10] [4500/6250] eta: 0:04:47 lr: 0.000124 grad: 0.1405 (0.1395) loss: 0.7858 (0.8064) time: 0.1613 data: 0.0762 max mem: 9377 +Train: [10] [4600/6250] eta: 0:04:30 lr: 0.000124 grad: 0.1288 (0.1397) loss: 0.7950 (0.8062) time: 0.1560 data: 0.0677 max mem: 9377 +Train: [10] [4700/6250] eta: 0:04:14 lr: 0.000124 grad: 0.1428 (0.1398) loss: 0.7967 (0.8059) time: 0.1250 data: 0.0363 max mem: 9377 +Train: [10] [4800/6250] eta: 0:03:57 lr: 0.000124 grad: 0.1432 (0.1399) loss: 0.7901 (0.8055) time: 0.1657 data: 0.0748 max mem: 9377 +Train: [10] [4900/6250] eta: 0:03:41 lr: 0.000124 grad: 0.1303 (0.1399) loss: 0.7966 (0.8053) time: 0.1653 data: 0.0841 max mem: 9377 +Train: [10] [5000/6250] eta: 0:03:24 lr: 0.000124 grad: 0.1372 (0.1400) loss: 0.7880 (0.8049) time: 0.1541 data: 0.0626 max mem: 9377 +Train: [10] [5100/6250] eta: 0:03:07 lr: 0.000124 grad: 0.1356 (0.1402) loss: 0.7783 (0.8046) time: 0.1666 data: 0.0833 max mem: 9377 +Train: [10] [5200/6250] eta: 0:02:51 lr: 0.000124 grad: 0.1313 (0.1402) loss: 0.7841 (0.8043) time: 0.1505 data: 0.0625 max mem: 9377 +Train: [10] [5300/6250] eta: 0:02:34 lr: 0.000124 grad: 0.1370 (0.1402) loss: 0.7912 (0.8041) time: 0.1474 data: 0.0536 max mem: 9377 +Train: [10] [5400/6250] eta: 0:02:18 lr: 0.000124 grad: 0.1349 (0.1404) loss: 0.7914 (0.8039) time: 0.1593 data: 0.0735 max mem: 9377 +Train: [10] [5500/6250] eta: 0:02:02 lr: 0.000124 grad: 0.1318 (0.1405) loss: 0.7824 (0.8036) time: 0.1850 data: 0.0964 max mem: 9377 +Train: [10] [5600/6250] eta: 0:01:45 lr: 0.000124 grad: 0.1338 (0.1405) loss: 0.7931 (0.8034) time: 0.1545 data: 0.0680 max mem: 9377 +Train: [10] [5700/6250] eta: 0:01:29 lr: 0.000124 grad: 0.1327 (0.1405) loss: 0.8015 (0.8033) time: 0.1477 data: 0.0628 max mem: 9377 +Train: [10] [5800/6250] eta: 0:01:13 lr: 0.000124 grad: 0.1551 (0.1406) loss: 0.7990 (0.8031) time: 0.1560 data: 0.0762 max mem: 9377 +Train: [10] [5900/6250] eta: 0:00:56 lr: 0.000124 grad: 0.1398 (0.1406) loss: 0.7888 (0.8029) time: 0.1597 data: 0.0712 max mem: 9377 +Train: [10] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.1382 (0.1406) loss: 0.7864 (0.8028) time: 0.1506 data: 0.0589 max mem: 9377 +Train: [10] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.1415 (0.1406) loss: 0.7970 (0.8027) time: 0.1788 data: 0.0910 max mem: 9377 +Train: [10] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.1391 (0.1405) loss: 0.7964 (0.8025) time: 0.1811 data: 0.0977 max mem: 9377 +Train: [10] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.1292 (0.1405) loss: 0.7893 (0.8024) time: 0.1796 data: 0.0888 max mem: 9377 +Train: [10] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000124 grad: 0.1292 (0.1405) loss: 0.7893 (0.8024) +Eval (hcp-train-subset): [10] [ 0/62] eta: 0:04:59 loss: 0.8639 (0.8639) time: 4.8273 data: 4.7950 max mem: 9377 +Eval (hcp-train-subset): [10] [61/62] eta: 0:00:00 loss: 0.8694 (0.8690) time: 0.1216 data: 0.0965 max mem: 9377 +Eval (hcp-train-subset): [10] Total time: 0:00:14 (0.2345 s / it) +Averaged stats (hcp-train-subset): loss: 0.8694 (0.8690) +Eval (hcp-val): [10] [ 0/62] eta: 0:05:02 loss: 0.8614 (0.8614) time: 4.8871 data: 4.8567 max mem: 9377 +Eval (hcp-val): [10] [61/62] eta: 0:00:00 loss: 0.8669 (0.8679) time: 0.1449 data: 0.1200 max mem: 9377 +Eval (hcp-val): [10] Total time: 0:00:14 (0.2314 s / it) +Averaged stats (hcp-val): loss: 0.8669 (0.8679) +Eval (nsd-val): [10] [ 0/62] eta: 0:04:29 loss: 0.8333 (0.8333) time: 4.3431 data: 4.3063 max mem: 9377 +Eval (nsd-val): [10] [61/62] eta: 0:00:00 loss: 0.8412 (0.8442) time: 0.1419 data: 0.1169 max mem: 9377 +Eval (nsd-val): [10] Total time: 0:00:13 (0.2121 s / it) +Averaged stats (nsd-val): loss: 0.8412 (0.8442) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [11] [ 0/6250] eta: 10:54:08 lr: 0.000124 grad: 0.1070 (0.1070) loss: 0.8560 (0.8560) time: 6.2797 data: 6.1549 max mem: 9377 +Train: [11] [ 100/6250] eta: 0:22:43 lr: 0.000124 grad: 0.1330 (0.1782) loss: 0.8160 (0.8322) time: 0.1743 data: 0.0891 max mem: 9377 +Train: [11] [ 200/6250] eta: 0:19:51 lr: 0.000124 grad: 0.1470 (0.1719) loss: 0.8036 (0.8172) time: 0.1858 data: 0.0930 max mem: 9377 +Train: [11] [ 300/6250] eta: 0:18:46 lr: 0.000124 grad: 0.1340 (0.1625) loss: 0.8108 (0.8133) time: 0.1671 data: 0.0736 max mem: 9377 +Train: [11] [ 400/6250] eta: 0:18:00 lr: 0.000124 grad: 0.1387 (0.1592) loss: 0.8037 (0.8107) time: 0.1732 data: 0.0940 max mem: 9377 +Train: [11] [ 500/6250] eta: 0:17:27 lr: 0.000124 grad: 0.1315 (0.1573) loss: 0.8097 (0.8083) time: 0.1573 data: 0.0659 max mem: 9377 +Train: [11] [ 600/6250] eta: 0:16:54 lr: 0.000124 grad: 0.1353 (0.1534) loss: 0.7966 (0.8070) time: 0.1631 data: 0.0622 max mem: 9377 +Train: [11] [ 700/6250] eta: 0:16:29 lr: 0.000124 grad: 0.1356 (0.1508) loss: 0.7925 (0.8060) time: 0.1613 data: 0.0784 max mem: 9377 +Train: [11] [ 800/6250] eta: 0:16:06 lr: 0.000124 grad: 0.1344 (0.1486) loss: 0.8068 (0.8054) time: 0.1666 data: 0.0787 max mem: 9377 +Train: [11] [ 900/6250] eta: 0:15:45 lr: 0.000124 grad: 0.1372 (0.1475) loss: 0.7901 (0.8042) time: 0.1748 data: 0.0865 max mem: 9377 +Train: [11] [1000/6250] eta: 0:15:10 lr: 0.000124 grad: 0.1215 (0.1462) loss: 0.8145 (0.8032) time: 0.1545 data: 0.0595 max mem: 9377 +Train: [11] [1100/6250] eta: 0:14:42 lr: 0.000124 grad: 0.1394 (0.1456) loss: 0.7942 (0.8024) time: 0.1547 data: 0.0651 max mem: 9377 +Train: [11] [1200/6250] eta: 0:14:15 lr: 0.000124 grad: 0.1293 (0.1445) loss: 0.8004 (0.8019) time: 0.1188 data: 0.0252 max mem: 9377 +Train: [11] [1300/6250] eta: 0:13:51 lr: 0.000124 grad: 0.1274 (0.1435) loss: 0.8045 (0.8017) time: 0.1517 data: 0.0528 max mem: 9377 +Train: [11] [1400/6250] eta: 0:13:26 lr: 0.000124 grad: 0.1291 (0.1428) loss: 0.7984 (0.8014) time: 0.1559 data: 0.0658 max mem: 9377 +Train: [11] [1500/6250] eta: 0:13:07 lr: 0.000124 grad: 0.1232 (0.1423) loss: 0.7886 (0.8008) time: 0.1897 data: 0.1071 max mem: 9377 +Train: [11] [1600/6250] eta: 0:12:47 lr: 0.000124 grad: 0.1418 (0.1417) loss: 0.7896 (0.8006) time: 0.1526 data: 0.0664 max mem: 9377 +Train: [11] [1700/6250] eta: 0:12:33 lr: 0.000124 grad: 0.1289 (0.1417) loss: 0.7815 (0.8000) time: 0.1479 data: 0.0631 max mem: 9377 +Train: [11] [1800/6250] eta: 0:12:15 lr: 0.000124 grad: 0.1319 (0.1412) loss: 0.7908 (0.7995) time: 0.1456 data: 0.0675 max mem: 9377 +Train: [11] [1900/6250] eta: 0:11:56 lr: 0.000124 grad: 0.1360 (0.1409) loss: 0.7851 (0.7990) time: 0.1610 data: 0.0738 max mem: 9377 +Train: [11] [2000/6250] eta: 0:11:38 lr: 0.000124 grad: 0.1410 (0.1410) loss: 0.7768 (0.7981) time: 0.1626 data: 0.0727 max mem: 9377 +Train: [11] [2100/6250] eta: 0:11:20 lr: 0.000124 grad: 0.1399 (0.1412) loss: 0.7827 (0.7972) time: 0.1672 data: 0.0806 max mem: 9377 +Train: [11] [2200/6250] eta: 0:11:04 lr: 0.000124 grad: 0.1339 (0.1413) loss: 0.7877 (0.7963) time: 0.2283 data: 0.1407 max mem: 9377 +Train: [11] [2300/6250] eta: 0:10:46 lr: 0.000124 grad: 0.1570 (0.1416) loss: 0.7822 (0.7956) time: 0.1600 data: 0.0759 max mem: 9377 +Train: [11] [2400/6250] eta: 0:10:29 lr: 0.000124 grad: 0.1250 (0.1415) loss: 0.7998 (0.7952) time: 0.1493 data: 0.0555 max mem: 9377 +Train: [11] [2500/6250] eta: 0:10:13 lr: 0.000124 grad: 0.1377 (0.1416) loss: 0.7871 (0.7946) time: 0.1375 data: 0.0445 max mem: 9377 +Train: [11] [2600/6250] eta: 0:09:56 lr: 0.000124 grad: 0.1431 (0.1417) loss: 0.7730 (0.7941) time: 0.1745 data: 0.0933 max mem: 9377 +Train: [11] [2700/6250] eta: 0:09:40 lr: 0.000124 grad: 0.1331 (0.1417) loss: 0.7810 (0.7936) time: 0.1663 data: 0.0793 max mem: 9377 +Train: [11] [2800/6250] eta: 0:09:23 lr: 0.000124 grad: 0.1344 (0.1416) loss: 0.7925 (0.7933) time: 0.1495 data: 0.0641 max mem: 9377 +Train: [11] [2900/6250] eta: 0:09:07 lr: 0.000124 grad: 0.1370 (0.1416) loss: 0.7799 (0.7929) time: 0.1455 data: 0.0654 max mem: 9377 +Train: [11] [3000/6250] eta: 0:08:49 lr: 0.000124 grad: 0.1384 (0.1414) loss: 0.7718 (0.7925) time: 0.1540 data: 0.0691 max mem: 9377 +Train: [11] [3100/6250] eta: 0:08:33 lr: 0.000124 grad: 0.1358 (0.1415) loss: 0.7904 (0.7922) time: 0.1328 data: 0.0549 max mem: 9377 +Train: [11] [3200/6250] eta: 0:08:15 lr: 0.000124 grad: 0.1504 (0.1416) loss: 0.7843 (0.7919) time: 0.1326 data: 0.0526 max mem: 9377 +Train: [11] [3300/6250] eta: 0:07:59 lr: 0.000124 grad: 0.1363 (0.1417) loss: 0.7778 (0.7916) time: 0.1278 data: 0.0423 max mem: 9377 +Train: [11] [3400/6250] eta: 0:07:42 lr: 0.000124 grad: 0.1381 (0.1418) loss: 0.7819 (0.7914) time: 0.1567 data: 0.0722 max mem: 9377 +Train: [11] [3500/6250] eta: 0:07:25 lr: 0.000124 grad: 0.1333 (0.1418) loss: 0.7904 (0.7912) time: 0.1796 data: 0.0971 max mem: 9377 +Train: [11] [3600/6250] eta: 0:07:09 lr: 0.000124 grad: 0.1340 (0.1418) loss: 0.7861 (0.7908) time: 0.1900 data: 0.1035 max mem: 9377 +Train: [11] [3700/6250] eta: 0:06:52 lr: 0.000124 grad: 0.1488 (0.1417) loss: 0.7820 (0.7907) time: 0.1628 data: 0.0835 max mem: 9377 +Train: [11] [3800/6250] eta: 0:06:35 lr: 0.000124 grad: 0.1303 (0.1418) loss: 0.7799 (0.7905) time: 0.1225 data: 0.0246 max mem: 9377 +Train: [11] [3900/6250] eta: 0:06:19 lr: 0.000124 grad: 0.1382 (0.1417) loss: 0.7713 (0.7903) time: 0.1614 data: 0.0670 max mem: 9377 +Train: [11] [4000/6250] eta: 0:06:02 lr: 0.000123 grad: 0.1349 (0.1416) loss: 0.7964 (0.7901) time: 0.1465 data: 0.0653 max mem: 9377 +Train: [11] [4100/6250] eta: 0:05:45 lr: 0.000123 grad: 0.1358 (0.1417) loss: 0.7723 (0.7900) time: 0.1637 data: 0.0834 max mem: 9377 +Train: [11] [4200/6250] eta: 0:05:29 lr: 0.000123 grad: 0.1257 (0.1416) loss: 0.7922 (0.7899) time: 0.1495 data: 0.0654 max mem: 9377 +Train: [11] [4300/6250] eta: 0:05:12 lr: 0.000123 grad: 0.1338 (0.1417) loss: 0.7846 (0.7897) time: 0.1803 data: 0.0957 max mem: 9377 +Train: [11] [4400/6250] eta: 0:04:56 lr: 0.000123 grad: 0.1277 (0.1417) loss: 0.7880 (0.7898) time: 0.1534 data: 0.0698 max mem: 9377 +Train: [11] [4500/6250] eta: 0:04:40 lr: 0.000123 grad: 0.1351 (0.1416) loss: 0.7867 (0.7898) time: 0.1812 data: 0.0904 max mem: 9377 +Train: [11] [4600/6250] eta: 0:04:23 lr: 0.000123 grad: 0.1406 (0.1417) loss: 0.7866 (0.7898) time: 0.1386 data: 0.0475 max mem: 9377 +Train: [11] [4700/6250] eta: 0:04:07 lr: 0.000123 grad: 0.1279 (0.1416) loss: 0.7872 (0.7898) time: 0.1605 data: 0.0687 max mem: 9377 +Train: [11] [4800/6250] eta: 0:03:51 lr: 0.000123 grad: 0.1454 (0.1416) loss: 0.7856 (0.7897) time: 0.1276 data: 0.0359 max mem: 9377 +Train: [11] [4900/6250] eta: 0:03:35 lr: 0.000123 grad: 0.1344 (0.1415) loss: 0.7924 (0.7898) time: 0.1716 data: 0.0843 max mem: 9377 +Train: [11] [5000/6250] eta: 0:03:19 lr: 0.000123 grad: 0.1352 (0.1414) loss: 0.7967 (0.7898) time: 0.1706 data: 0.0836 max mem: 9377 +Train: [11] [5100/6250] eta: 0:03:03 lr: 0.000123 grad: 0.1274 (0.1414) loss: 0.7912 (0.7898) time: 0.1553 data: 0.0703 max mem: 9377 +Train: [11] [5200/6250] eta: 0:02:47 lr: 0.000123 grad: 0.1346 (0.1414) loss: 0.7909 (0.7897) time: 0.1738 data: 0.0890 max mem: 9377 +Train: [11] [5300/6250] eta: 0:02:31 lr: 0.000123 grad: 0.1443 (0.1414) loss: 0.7738 (0.7897) time: 0.1803 data: 0.0903 max mem: 9377 +Train: [11] [5400/6250] eta: 0:02:15 lr: 0.000123 grad: 0.1417 (0.1416) loss: 0.7886 (0.7895) time: 0.1632 data: 0.0819 max mem: 9377 +Train: [11] [5500/6250] eta: 0:01:59 lr: 0.000123 grad: 0.1318 (0.1416) loss: 0.7855 (0.7893) time: 0.1692 data: 0.0813 max mem: 9377 +Train: [11] [5600/6250] eta: 0:01:43 lr: 0.000123 grad: 0.1527 (0.1417) loss: 0.7644 (0.7890) time: 0.1714 data: 0.0788 max mem: 9377 +Train: [11] [5700/6250] eta: 0:01:27 lr: 0.000123 grad: 0.1350 (0.1417) loss: 0.7775 (0.7888) time: 0.1402 data: 0.0513 max mem: 9377 +Train: [11] [5800/6250] eta: 0:01:11 lr: 0.000123 grad: 0.1391 (0.1419) loss: 0.7767 (0.7886) time: 0.1306 data: 0.0413 max mem: 9377 +Train: [11] [5900/6250] eta: 0:00:55 lr: 0.000123 grad: 0.1511 (0.1419) loss: 0.7764 (0.7884) time: 0.1648 data: 0.0839 max mem: 9377 +Train: [11] [6000/6250] eta: 0:00:39 lr: 0.000123 grad: 0.1414 (0.1420) loss: 0.7815 (0.7883) time: 0.1367 data: 0.0525 max mem: 9377 +Train: [11] [6100/6250] eta: 0:00:23 lr: 0.000123 grad: 0.1311 (0.1420) loss: 0.7803 (0.7882) time: 0.1647 data: 0.0827 max mem: 9377 +Train: [11] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.1317 (0.1420) loss: 0.7831 (0.7882) time: 0.1482 data: 0.0522 max mem: 9377 +Train: [11] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.1251 (0.1420) loss: 0.7980 (0.7882) time: 0.1798 data: 0.0962 max mem: 9377 +Train: [11] Total time: 0:16:39 (0.1599 s / it) +Averaged stats: lr: 0.000123 grad: 0.1251 (0.1420) loss: 0.7980 (0.7882) +Eval (hcp-train-subset): [11] [ 0/62] eta: 0:06:05 loss: 0.8627 (0.8627) time: 5.8963 data: 5.8651 max mem: 9377 +Eval (hcp-train-subset): [11] [61/62] eta: 0:00:00 loss: 0.8663 (0.8676) time: 0.1464 data: 0.1195 max mem: 9377 +Eval (hcp-train-subset): [11] Total time: 0:00:14 (0.2388 s / it) +Averaged stats (hcp-train-subset): loss: 0.8663 (0.8676) +Eval (hcp-val): [11] [ 0/62] eta: 0:05:11 loss: 0.8649 (0.8649) time: 5.0175 data: 4.9873 max mem: 9377 +Eval (hcp-val): [11] [61/62] eta: 0:00:00 loss: 0.8659 (0.8661) time: 0.1214 data: 0.0964 max mem: 9377 +Eval (hcp-val): [11] Total time: 0:00:14 (0.2308 s / it) +Averaged stats (hcp-val): loss: 0.8659 (0.8661) +Eval (nsd-val): [11] [ 0/62] eta: 0:04:55 loss: 0.8418 (0.8418) time: 4.7637 data: 4.7323 max mem: 9377 +Eval (nsd-val): [11] [61/62] eta: 0:00:00 loss: 0.8447 (0.8473) time: 0.1242 data: 0.0991 max mem: 9377 +Eval (nsd-val): [11] Total time: 0:00:14 (0.2369 s / it) +Averaged stats (nsd-val): loss: 0.8447 (0.8473) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-best.pth +Train: [12] [ 0/6250] eta: 11:08:06 lr: 0.000123 grad: 0.0851 (0.0851) loss: 0.8658 (0.8658) time: 6.4139 data: 6.2678 max mem: 9377 +Train: [12] [ 100/6250] eta: 0:23:01 lr: 0.000123 grad: 0.1520 (0.1695) loss: 0.8244 (0.8340) time: 0.1998 data: 0.1121 max mem: 9377 +Train: [12] [ 200/6250] eta: 0:19:50 lr: 0.000123 grad: 0.1504 (0.1720) loss: 0.8057 (0.8182) time: 0.1574 data: 0.0614 max mem: 9377 +Train: [12] [ 300/6250] eta: 0:18:32 lr: 0.000123 grad: 0.1472 (0.1709) loss: 0.7621 (0.8052) time: 0.1515 data: 0.0566 max mem: 9377 +Train: [12] [ 400/6250] eta: 0:17:47 lr: 0.000123 grad: 0.1427 (0.1645) loss: 0.7830 (0.7994) time: 0.1733 data: 0.0860 max mem: 9377 +Train: [12] [ 500/6250] eta: 0:17:04 lr: 0.000123 grad: 0.1449 (0.1605) loss: 0.7777 (0.7965) time: 0.1528 data: 0.0667 max mem: 9377 +Train: [12] [ 600/6250] eta: 0:16:23 lr: 0.000123 grad: 0.1348 (0.1573) loss: 0.7988 (0.7953) time: 0.1430 data: 0.0592 max mem: 9377 +Train: [12] [ 700/6250] eta: 0:15:50 lr: 0.000123 grad: 0.1251 (0.1542) loss: 0.8135 (0.7948) time: 0.1138 data: 0.0325 max mem: 9377 +Train: [12] [ 800/6250] eta: 0:15:24 lr: 0.000123 grad: 0.1286 (0.1513) loss: 0.7968 (0.7944) time: 0.1532 data: 0.0670 max mem: 9377 +Train: [12] [ 900/6250] eta: 0:15:01 lr: 0.000123 grad: 0.1263 (0.1496) loss: 0.7908 (0.7935) time: 0.1609 data: 0.0685 max mem: 9377 +Train: [12] [1000/6250] eta: 0:14:28 lr: 0.000123 grad: 0.1368 (0.1479) loss: 0.7753 (0.7927) time: 0.1345 data: 0.0530 max mem: 9377 +Train: [12] [1100/6250] eta: 0:14:04 lr: 0.000123 grad: 0.1241 (0.1466) loss: 0.7898 (0.7918) time: 0.1511 data: 0.0448 max mem: 9377 +Train: [12] [1200/6250] eta: 0:13:41 lr: 0.000123 grad: 0.1292 (0.1455) loss: 0.7825 (0.7914) time: 0.1301 data: 0.0319 max mem: 9377 +Train: [12] [1300/6250] eta: 0:13:19 lr: 0.000123 grad: 0.1286 (0.1450) loss: 0.7820 (0.7907) time: 0.1540 data: 0.0489 max mem: 9377 +Train: [12] [1400/6250] eta: 0:12:58 lr: 0.000123 grad: 0.1155 (0.1441) loss: 0.7857 (0.7903) time: 0.1471 data: 0.0502 max mem: 9377 +Train: [12] [1500/6250] eta: 0:12:37 lr: 0.000123 grad: 0.1366 (0.1435) loss: 0.7860 (0.7896) time: 0.1526 data: 0.0560 max mem: 9377 +Train: [12] [1600/6250] eta: 0:12:17 lr: 0.000123 grad: 0.1327 (0.1429) loss: 0.7808 (0.7894) time: 0.1512 data: 0.0642 max mem: 9377 +Train: [12] [1700/6250] eta: 0:12:01 lr: 0.000123 grad: 0.1386 (0.1426) loss: 0.7891 (0.7891) time: 0.1600 data: 0.0705 max mem: 9377 +Train: [12] [1800/6250] eta: 0:11:45 lr: 0.000123 grad: 0.1499 (0.1428) loss: 0.7791 (0.7888) time: 0.1481 data: 0.0686 max mem: 9377 +Train: [12] [1900/6250] eta: 0:11:32 lr: 0.000123 grad: 0.1447 (0.1428) loss: 0.7727 (0.7884) time: 0.1301 data: 0.0375 max mem: 9377 +Train: [12] [2000/6250] eta: 0:11:17 lr: 0.000123 grad: 0.1387 (0.1429) loss: 0.7678 (0.7878) time: 0.1527 data: 0.0546 max mem: 9377 +Train: [12] [2100/6250] eta: 0:11:01 lr: 0.000123 grad: 0.1364 (0.1427) loss: 0.7799 (0.7876) time: 0.1836 data: 0.0970 max mem: 9377 +Train: [12] [2200/6250] eta: 0:10:43 lr: 0.000123 grad: 0.1313 (0.1425) loss: 0.7764 (0.7872) time: 0.1531 data: 0.0681 max mem: 9377 +Train: [12] [2300/6250] eta: 0:10:27 lr: 0.000123 grad: 0.1333 (0.1424) loss: 0.7781 (0.7870) time: 0.1561 data: 0.0712 max mem: 9377 +Train: [12] [2400/6250] eta: 0:10:09 lr: 0.000123 grad: 0.1337 (0.1422) loss: 0.7781 (0.7866) time: 0.1345 data: 0.0460 max mem: 9377 +Train: [12] [2500/6250] eta: 0:09:52 lr: 0.000123 grad: 0.1378 (0.1424) loss: 0.7840 (0.7864) time: 0.1659 data: 0.0836 max mem: 9377 +Train: [12] [2600/6250] eta: 0:09:36 lr: 0.000123 grad: 0.1361 (0.1422) loss: 0.7709 (0.7860) time: 0.1387 data: 0.0600 max mem: 9377 +Train: [12] [2700/6250] eta: 0:09:20 lr: 0.000123 grad: 0.1371 (0.1420) loss: 0.7863 (0.7858) time: 0.1701 data: 0.0824 max mem: 9377 +Train: [12] [2800/6250] eta: 0:09:04 lr: 0.000123 grad: 0.1230 (0.1417) loss: 0.7927 (0.7857) time: 0.1860 data: 0.1064 max mem: 9377 +Train: [12] [2900/6250] eta: 0:08:48 lr: 0.000123 grad: 0.1291 (0.1416) loss: 0.7674 (0.7855) time: 0.1777 data: 0.0985 max mem: 9377 +Train: [12] [3000/6250] eta: 0:08:32 lr: 0.000123 grad: 0.1326 (0.1415) loss: 0.7758 (0.7851) time: 0.1692 data: 0.0856 max mem: 9377 +Train: [12] [3100/6250] eta: 0:08:16 lr: 0.000123 grad: 0.1358 (0.1416) loss: 0.7635 (0.7847) time: 0.1544 data: 0.0686 max mem: 9377 +Train: [12] [3200/6250] eta: 0:08:00 lr: 0.000123 grad: 0.1318 (0.1415) loss: 0.7883 (0.7845) time: 0.1612 data: 0.0765 max mem: 9377 +Train: [12] [3300/6250] eta: 0:07:44 lr: 0.000123 grad: 0.1280 (0.1415) loss: 0.7839 (0.7843) time: 0.1418 data: 0.0600 max mem: 9377 +Train: [12] [3400/6250] eta: 0:07:28 lr: 0.000123 grad: 0.1437 (0.1414) loss: 0.7639 (0.7841) time: 0.1512 data: 0.0679 max mem: 9377 +Train: [12] [3500/6250] eta: 0:07:12 lr: 0.000123 grad: 0.1338 (0.1413) loss: 0.7781 (0.7840) time: 0.1547 data: 0.0703 max mem: 9377 +Train: [12] [3600/6250] eta: 0:06:56 lr: 0.000123 grad: 0.1324 (0.1412) loss: 0.7753 (0.7838) time: 0.1593 data: 0.0730 max mem: 9377 +Train: [12] [3700/6250] eta: 0:06:40 lr: 0.000123 grad: 0.1344 (0.1411) loss: 0.7890 (0.7835) time: 0.1380 data: 0.0484 max mem: 9377 +Train: [12] [3800/6250] eta: 0:06:25 lr: 0.000123 grad: 0.1318 (0.1409) loss: 0.7819 (0.7834) time: 0.1440 data: 0.0594 max mem: 9377 +Train: [12] [3900/6250] eta: 0:06:09 lr: 0.000123 grad: 0.1253 (0.1407) loss: 0.7767 (0.7833) time: 0.1687 data: 0.0869 max mem: 9377 +Train: [12] [4000/6250] eta: 0:05:54 lr: 0.000123 grad: 0.1484 (0.1405) loss: 0.7785 (0.7832) time: 0.1702 data: 0.0804 max mem: 9377 +Train: [12] [4100/6250] eta: 0:05:38 lr: 0.000123 grad: 0.1512 (0.1407) loss: 0.7701 (0.7831) time: 0.1587 data: 0.0698 max mem: 9377 +Train: [12] [4200/6250] eta: 0:05:22 lr: 0.000123 grad: 0.1397 (0.1406) loss: 0.7805 (0.7829) time: 0.1548 data: 0.0678 max mem: 9377 +Train: [12] [4300/6250] eta: 0:05:06 lr: 0.000123 grad: 0.1333 (0.1406) loss: 0.7893 (0.7830) time: 0.1716 data: 0.0849 max mem: 9377 +Train: [12] [4400/6250] eta: 0:04:51 lr: 0.000123 grad: 0.1350 (0.1408) loss: 0.7633 (0.7828) time: 0.1759 data: 0.0849 max mem: 9377 +Train: [12] [4500/6250] eta: 0:04:35 lr: 0.000123 grad: 0.1350 (0.1408) loss: 0.7745 (0.7825) time: 0.1821 data: 0.0966 max mem: 9377 +Train: [12] [4600/6250] eta: 0:04:19 lr: 0.000123 grad: 0.1356 (0.1407) loss: 0.7814 (0.7825) time: 0.1420 data: 0.0466 max mem: 9377 +Train: [12] [4700/6250] eta: 0:04:03 lr: 0.000123 grad: 0.1337 (0.1407) loss: 0.7787 (0.7824) time: 0.1620 data: 0.0745 max mem: 9377 +Train: [12] [4800/6250] eta: 0:03:48 lr: 0.000123 grad: 0.1399 (0.1407) loss: 0.7874 (0.7824) time: 0.1547 data: 0.0665 max mem: 9377 +Train: [12] [4900/6250] eta: 0:03:32 lr: 0.000123 grad: 0.1292 (0.1407) loss: 0.7768 (0.7822) time: 0.1837 data: 0.1002 max mem: 9377 +Train: [12] [5000/6250] eta: 0:03:16 lr: 0.000123 grad: 0.1303 (0.1408) loss: 0.7758 (0.7821) time: 0.1650 data: 0.0752 max mem: 9377 +Train: [12] [5100/6250] eta: 0:03:00 lr: 0.000123 grad: 0.1369 (0.1408) loss: 0.7852 (0.7821) time: 0.1655 data: 0.0812 max mem: 9377 +Train: [12] [5200/6250] eta: 0:02:45 lr: 0.000123 grad: 0.1361 (0.1408) loss: 0.7842 (0.7821) time: 0.1947 data: 0.1026 max mem: 9377 +Train: [12] [5300/6250] eta: 0:02:29 lr: 0.000123 grad: 0.1393 (0.1408) loss: 0.7637 (0.7820) time: 0.1595 data: 0.0715 max mem: 9377 +Train: [12] [5400/6250] eta: 0:02:13 lr: 0.000123 grad: 0.1384 (0.1410) loss: 0.7904 (0.7820) time: 0.1631 data: 0.0798 max mem: 9377 +Train: [12] [5500/6250] eta: 0:01:58 lr: 0.000123 grad: 0.1384 (0.1410) loss: 0.7740 (0.7819) time: 0.1509 data: 0.0665 max mem: 9377 +Train: [12] [5600/6250] eta: 0:01:42 lr: 0.000123 grad: 0.1361 (0.1410) loss: 0.7682 (0.7817) time: 0.1442 data: 0.0606 max mem: 9377 +Train: [12] [5700/6250] eta: 0:01:26 lr: 0.000123 grad: 0.1392 (0.1410) loss: 0.7741 (0.7815) time: 0.1580 data: 0.0721 max mem: 9377 +Train: [12] [5800/6250] eta: 0:01:10 lr: 0.000123 grad: 0.1426 (0.1410) loss: 0.7702 (0.7814) time: 0.1591 data: 0.0645 max mem: 9377 +Train: [12] [5900/6250] eta: 0:00:55 lr: 0.000123 grad: 0.1446 (0.1410) loss: 0.7745 (0.7812) time: 0.1608 data: 0.0669 max mem: 9377 +Train: [12] [6000/6250] eta: 0:00:39 lr: 0.000123 grad: 0.1395 (0.1411) loss: 0.7774 (0.7811) time: 0.1446 data: 0.0538 max mem: 9377 +Train: [12] [6100/6250] eta: 0:00:23 lr: 0.000123 grad: 0.1415 (0.1412) loss: 0.7776 (0.7808) time: 0.1600 data: 0.0716 max mem: 9377 +Train: [12] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.1369 (0.1412) loss: 0.7720 (0.7807) time: 0.1526 data: 0.0655 max mem: 9377 +Train: [12] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.1384 (0.1413) loss: 0.7756 (0.7806) time: 0.1735 data: 0.0933 max mem: 9377 +Train: [12] Total time: 0:16:31 (0.1587 s / it) +Averaged stats: lr: 0.000123 grad: 0.1384 (0.1413) loss: 0.7756 (0.7806) +Eval (hcp-train-subset): [12] [ 0/62] eta: 0:05:22 loss: 0.8667 (0.8667) time: 5.1994 data: 5.1693 max mem: 9377 +Eval (hcp-train-subset): [12] [61/62] eta: 0:00:00 loss: 0.8684 (0.8684) time: 0.1374 data: 0.1122 max mem: 9377 +Eval (hcp-train-subset): [12] Total time: 0:00:14 (0.2325 s / it) +Averaged stats (hcp-train-subset): loss: 0.8684 (0.8684) +Eval (hcp-val): [12] [ 0/62] eta: 0:05:14 loss: 0.8634 (0.8634) time: 5.0801 data: 5.0510 max mem: 9377 +Eval (hcp-val): [12] [61/62] eta: 0:00:00 loss: 0.8664 (0.8668) time: 0.1323 data: 0.1069 max mem: 9377 +Eval (hcp-val): [12] Total time: 0:00:14 (0.2351 s / it) +Averaged stats (hcp-val): loss: 0.8664 (0.8668) +Eval (nsd-val): [12] [ 0/62] eta: 0:03:37 loss: 0.8318 (0.8318) time: 3.5018 data: 3.4206 max mem: 9377 +Eval (nsd-val): [12] [61/62] eta: 0:00:00 loss: 0.8443 (0.8429) time: 0.1120 data: 0.0872 max mem: 9377 +Eval (nsd-val): [12] Total time: 0:00:13 (0.2216 s / it) +Averaged stats (nsd-val): loss: 0.8443 (0.8429) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [13] [ 0/6250] eta: 6:24:23 lr: 0.000123 grad: 0.0970 (0.0970) loss: 0.8709 (0.8709) time: 3.6901 data: 3.3970 max mem: 9377 +Train: [13] [ 100/6250] eta: 0:21:36 lr: 0.000123 grad: 0.1957 (0.2085) loss: 0.7764 (0.7994) time: 0.1412 data: 0.0516 max mem: 9377 +Train: [13] [ 200/6250] eta: 0:19:10 lr: 0.000123 grad: 0.1575 (0.1962) loss: 0.7814 (0.7934) time: 0.1594 data: 0.0643 max mem: 9377 +Train: [13] [ 300/6250] eta: 0:17:49 lr: 0.000123 grad: 0.1586 (0.1906) loss: 0.7751 (0.7906) time: 0.1521 data: 0.0585 max mem: 9377 +Train: [13] [ 400/6250] eta: 0:17:03 lr: 0.000123 grad: 0.1603 (0.1834) loss: 0.7713 (0.7883) time: 0.1455 data: 0.0520 max mem: 9377 +Train: [13] [ 500/6250] eta: 0:16:30 lr: 0.000123 grad: 0.1517 (0.1797) loss: 0.7836 (0.7863) time: 0.1654 data: 0.0841 max mem: 9377 +Train: [13] [ 600/6250] eta: 0:15:59 lr: 0.000123 grad: 0.1294 (0.1744) loss: 0.7794 (0.7849) time: 0.1480 data: 0.0521 max mem: 9377 +Train: [13] [ 700/6250] eta: 0:15:35 lr: 0.000123 grad: 0.1529 (0.1717) loss: 0.7675 (0.7834) time: 0.1627 data: 0.0731 max mem: 9377 +Train: [13] [ 800/6250] eta: 0:15:07 lr: 0.000123 grad: 0.1438 (0.1690) loss: 0.7757 (0.7822) time: 0.1575 data: 0.0648 max mem: 9377 +Train: [13] [ 900/6250] eta: 0:14:46 lr: 0.000123 grad: 0.1508 (0.1674) loss: 0.7780 (0.7816) time: 0.1614 data: 0.0791 max mem: 9377 +Train: [13] [1000/6250] eta: 0:14:21 lr: 0.000123 grad: 0.1203 (0.1644) loss: 0.7826 (0.7816) time: 0.1478 data: 0.0626 max mem: 9377 +Train: [13] [1100/6250] eta: 0:13:58 lr: 0.000123 grad: 0.1220 (0.1618) loss: 0.7872 (0.7815) time: 0.1796 data: 0.0815 max mem: 9377 +Train: [13] [1200/6250] eta: 0:13:45 lr: 0.000123 grad: 0.1334 (0.1599) loss: 0.7823 (0.7809) time: 0.1661 data: 0.0630 max mem: 9377 +Train: [13] [1300/6250] eta: 0:13:34 lr: 0.000123 grad: 0.1291 (0.1582) loss: 0.7840 (0.7807) time: 0.1637 data: 0.0606 max mem: 9377 +Train: [13] [1400/6250] eta: 0:13:20 lr: 0.000123 grad: 0.1348 (0.1570) loss: 0.7731 (0.7805) time: 0.1702 data: 0.0811 max mem: 9377 +Train: [13] [1500/6250] eta: 0:13:03 lr: 0.000123 grad: 0.1310 (0.1558) loss: 0.7812 (0.7803) time: 0.1656 data: 0.0691 max mem: 9377 +Train: [13] [1600/6250] eta: 0:12:44 lr: 0.000123 grad: 0.1250 (0.1544) loss: 0.7754 (0.7800) time: 0.1617 data: 0.0759 max mem: 9377 +Train: [13] [1700/6250] eta: 0:12:27 lr: 0.000123 grad: 0.1337 (0.1536) loss: 0.7692 (0.7797) time: 0.1690 data: 0.0817 max mem: 9377 +Train: [13] [1800/6250] eta: 0:12:10 lr: 0.000123 grad: 0.1384 (0.1528) loss: 0.7638 (0.7791) time: 0.1367 data: 0.0500 max mem: 9377 +Train: [13] [1900/6250] eta: 0:11:52 lr: 0.000123 grad: 0.1340 (0.1519) loss: 0.7751 (0.7787) time: 0.1495 data: 0.0644 max mem: 9377 +Train: [13] [2000/6250] eta: 0:11:33 lr: 0.000123 grad: 0.1389 (0.1513) loss: 0.7668 (0.7783) time: 0.1518 data: 0.0568 max mem: 9377 +Train: [13] [2100/6250] eta: 0:11:15 lr: 0.000123 grad: 0.1328 (0.1507) loss: 0.7703 (0.7781) time: 0.1455 data: 0.0610 max mem: 9377 +Train: [13] [2200/6250] eta: 0:10:58 lr: 0.000123 grad: 0.1286 (0.1501) loss: 0.7681 (0.7776) time: 0.1687 data: 0.0786 max mem: 9377 +Train: [13] [2300/6250] eta: 0:10:42 lr: 0.000123 grad: 0.1386 (0.1498) loss: 0.7745 (0.7773) time: 0.1201 data: 0.0153 max mem: 9377 +Train: [13] [2400/6250] eta: 0:10:25 lr: 0.000123 grad: 0.1414 (0.1493) loss: 0.7653 (0.7769) time: 0.1458 data: 0.0607 max mem: 9377 +Train: [13] [2500/6250] eta: 0:10:09 lr: 0.000123 grad: 0.1369 (0.1488) loss: 0.7705 (0.7766) time: 0.1602 data: 0.0789 max mem: 9377 +Train: [13] [2600/6250] eta: 0:09:51 lr: 0.000123 grad: 0.1506 (0.1485) loss: 0.7556 (0.7762) time: 0.1610 data: 0.0808 max mem: 9377 +Train: [13] [2700/6250] eta: 0:09:33 lr: 0.000123 grad: 0.1280 (0.1479) loss: 0.7774 (0.7759) time: 0.1571 data: 0.0692 max mem: 9377 +Train: [13] [2800/6250] eta: 0:09:16 lr: 0.000123 grad: 0.1412 (0.1478) loss: 0.7645 (0.7755) time: 0.1267 data: 0.0376 max mem: 9377 +Train: [13] [2900/6250] eta: 0:08:58 lr: 0.000123 grad: 0.1272 (0.1474) loss: 0.7716 (0.7754) time: 0.1605 data: 0.0730 max mem: 9377 +Train: [13] [3000/6250] eta: 0:08:41 lr: 0.000123 grad: 0.1360 (0.1473) loss: 0.7732 (0.7752) time: 0.1281 data: 0.0324 max mem: 9377 +Train: [13] [3100/6250] eta: 0:08:24 lr: 0.000123 grad: 0.1317 (0.1469) loss: 0.7703 (0.7750) time: 0.1503 data: 0.0682 max mem: 9377 +Train: [13] [3200/6250] eta: 0:08:08 lr: 0.000123 grad: 0.1404 (0.1468) loss: 0.7623 (0.7748) time: 0.1471 data: 0.0536 max mem: 9377 +Train: [13] [3300/6250] eta: 0:07:52 lr: 0.000123 grad: 0.1521 (0.1467) loss: 0.7689 (0.7747) time: 0.1625 data: 0.0706 max mem: 9377 +Train: [13] [3400/6250] eta: 0:07:37 lr: 0.000123 grad: 0.1513 (0.1468) loss: 0.7680 (0.7747) time: 0.1908 data: 0.1064 max mem: 9377 +Train: [13] [3500/6250] eta: 0:07:20 lr: 0.000123 grad: 0.1458 (0.1466) loss: 0.7759 (0.7747) time: 0.1565 data: 0.0633 max mem: 9377 +Train: [13] [3600/6250] eta: 0:07:05 lr: 0.000123 grad: 0.1364 (0.1465) loss: 0.7773 (0.7747) time: 0.1673 data: 0.0657 max mem: 9377 +Train: [13] [3700/6250] eta: 0:06:48 lr: 0.000122 grad: 0.1374 (0.1463) loss: 0.7720 (0.7745) time: 0.1538 data: 0.0629 max mem: 9377 +Train: [13] [3800/6250] eta: 0:06:33 lr: 0.000122 grad: 0.1308 (0.1461) loss: 0.7677 (0.7744) time: 0.1557 data: 0.0760 max mem: 9377 +Train: [13] [3900/6250] eta: 0:06:16 lr: 0.000122 grad: 0.1440 (0.1463) loss: 0.7815 (0.7743) time: 0.1401 data: 0.0406 max mem: 9377 +Train: [13] [4000/6250] eta: 0:06:00 lr: 0.000122 grad: 0.1438 (0.1461) loss: 0.7657 (0.7742) time: 0.1535 data: 0.0601 max mem: 9377 +Train: [13] [4100/6250] eta: 0:05:44 lr: 0.000122 grad: 0.1368 (0.1460) loss: 0.7618 (0.7741) time: 0.1983 data: 0.1026 max mem: 9377 +Train: [13] [4200/6250] eta: 0:05:28 lr: 0.000122 grad: 0.1410 (0.1459) loss: 0.7686 (0.7740) time: 0.1563 data: 0.0685 max mem: 9377 +Train: [13] [4300/6250] eta: 0:05:12 lr: 0.000122 grad: 0.1477 (0.1458) loss: 0.7818 (0.7740) time: 0.1695 data: 0.0872 max mem: 9377 +Train: [13] [4400/6250] eta: 0:04:56 lr: 0.000122 grad: 0.1381 (0.1457) loss: 0.7752 (0.7741) time: 0.1544 data: 0.0653 max mem: 9377 +Train: [13] [4500/6250] eta: 0:04:40 lr: 0.000122 grad: 0.1322 (0.1458) loss: 0.7698 (0.7742) time: 0.1703 data: 0.0813 max mem: 9377 +Train: [13] [4600/6250] eta: 0:04:23 lr: 0.000122 grad: 0.1359 (0.1455) loss: 0.7755 (0.7743) time: 0.1811 data: 0.0874 max mem: 9377 +Train: [13] [4700/6250] eta: 0:04:07 lr: 0.000122 grad: 0.1352 (0.1455) loss: 0.7835 (0.7744) time: 0.1550 data: 0.0748 max mem: 9377 +Train: [13] [4800/6250] eta: 0:03:51 lr: 0.000122 grad: 0.1369 (0.1455) loss: 0.7805 (0.7744) time: 0.1481 data: 0.0658 max mem: 9377 +Train: [13] [4900/6250] eta: 0:03:35 lr: 0.000122 grad: 0.1350 (0.1455) loss: 0.7757 (0.7744) time: 0.1252 data: 0.0361 max mem: 9377 +Train: [13] [5000/6250] eta: 0:03:19 lr: 0.000122 grad: 0.1356 (0.1453) loss: 0.7756 (0.7744) time: 0.1555 data: 0.0639 max mem: 9377 +Train: [13] [5100/6250] eta: 0:03:03 lr: 0.000122 grad: 0.1366 (0.1452) loss: 0.7778 (0.7745) time: 0.1715 data: 0.0845 max mem: 9377 +Train: [13] [5200/6250] eta: 0:02:47 lr: 0.000122 grad: 0.1346 (0.1451) loss: 0.7704 (0.7744) time: 0.1477 data: 0.0486 max mem: 9377 +Train: [13] [5300/6250] eta: 0:02:31 lr: 0.000122 grad: 0.1343 (0.1451) loss: 0.7817 (0.7744) time: 0.1504 data: 0.0615 max mem: 9377 +Train: [13] [5400/6250] eta: 0:02:15 lr: 0.000122 grad: 0.1391 (0.1450) loss: 0.7835 (0.7745) time: 0.1424 data: 0.0515 max mem: 9377 +Train: [13] [5500/6250] eta: 0:01:59 lr: 0.000122 grad: 0.1269 (0.1448) loss: 0.7878 (0.7746) time: 0.1521 data: 0.0609 max mem: 9377 +Train: [13] [5600/6250] eta: 0:01:43 lr: 0.000122 grad: 0.1388 (0.1447) loss: 0.7705 (0.7747) time: 0.1733 data: 0.0855 max mem: 9377 +Train: [13] [5700/6250] eta: 0:01:27 lr: 0.000122 grad: 0.1346 (0.1446) loss: 0.7857 (0.7747) time: 0.1413 data: 0.0556 max mem: 9377 +Train: [13] [5800/6250] eta: 0:01:11 lr: 0.000122 grad: 0.1299 (0.1445) loss: 0.7692 (0.7747) time: 0.2099 data: 0.1248 max mem: 9377 +Train: [13] [5900/6250] eta: 0:00:55 lr: 0.000122 grad: 0.1307 (0.1444) loss: 0.7741 (0.7747) time: 0.1514 data: 0.0716 max mem: 9377 +Train: [13] [6000/6250] eta: 0:00:39 lr: 0.000122 grad: 0.1333 (0.1444) loss: 0.7774 (0.7746) time: 0.1400 data: 0.0480 max mem: 9377 +Train: [13] [6100/6250] eta: 0:00:23 lr: 0.000122 grad: 0.1301 (0.1444) loss: 0.7844 (0.7747) time: 0.1092 data: 0.0085 max mem: 9377 +Train: [13] [6200/6250] eta: 0:00:07 lr: 0.000122 grad: 0.1376 (0.1443) loss: 0.7771 (0.7747) time: 0.1547 data: 0.0679 max mem: 9377 +Train: [13] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.1322 (0.1442) loss: 0.7693 (0.7747) time: 0.1743 data: 0.0870 max mem: 9377 +Train: [13] Total time: 0:16:39 (0.1599 s / it) +Averaged stats: lr: 0.000122 grad: 0.1322 (0.1442) loss: 0.7693 (0.7747) +Eval (hcp-train-subset): [13] [ 0/62] eta: 0:06:20 loss: 0.8647 (0.8647) time: 6.1427 data: 6.1133 max mem: 9377 +Eval (hcp-train-subset): [13] [61/62] eta: 0:00:00 loss: 0.8689 (0.8682) time: 0.1255 data: 0.0986 max mem: 9377 +Eval (hcp-train-subset): [13] Total time: 0:00:14 (0.2379 s / it) +Averaged stats (hcp-train-subset): loss: 0.8689 (0.8682) +Eval (hcp-val): [13] [ 0/62] eta: 0:05:19 loss: 0.8644 (0.8644) time: 5.1535 data: 5.1242 max mem: 9377 +Eval (hcp-val): [13] [61/62] eta: 0:00:00 loss: 0.8666 (0.8667) time: 0.1429 data: 0.1176 max mem: 9377 +Eval (hcp-val): [13] Total time: 0:00:14 (0.2301 s / it) +Averaged stats (hcp-val): loss: 0.8666 (0.8667) +Eval (nsd-val): [13] [ 0/62] eta: 0:04:35 loss: 0.8376 (0.8376) time: 4.4388 data: 4.3963 max mem: 9377 +Eval (nsd-val): [13] [61/62] eta: 0:00:00 loss: 0.8478 (0.8469) time: 0.1348 data: 0.1096 max mem: 9377 +Eval (nsd-val): [13] Total time: 0:00:13 (0.2236 s / it) +Averaged stats (nsd-val): loss: 0.8478 (0.8469) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [14] [ 0/6250] eta: 9:20:26 lr: 0.000122 grad: 0.2141 (0.2141) loss: 0.8643 (0.8643) time: 5.3802 data: 5.2800 max mem: 9377 +Train: [14] [ 100/6250] eta: 0:21:02 lr: 0.000122 grad: 0.2389 (0.2696) loss: 0.7741 (0.8014) time: 0.1520 data: 0.0569 max mem: 9377 +Train: [14] [ 200/6250] eta: 0:18:46 lr: 0.000122 grad: 0.1894 (0.2442) loss: 0.7600 (0.7811) time: 0.1757 data: 0.0740 max mem: 9377 +Train: [14] [ 300/6250] eta: 0:17:39 lr: 0.000122 grad: 0.1753 (0.2299) loss: 0.7575 (0.7735) time: 0.1760 data: 0.0900 max mem: 9377 +Train: [14] [ 400/6250] eta: 0:16:56 lr: 0.000122 grad: 0.1710 (0.2165) loss: 0.7475 (0.7688) time: 0.1549 data: 0.0678 max mem: 9377 +Train: [14] [ 500/6250] eta: 0:16:43 lr: 0.000122 grad: 0.1481 (0.2066) loss: 0.7581 (0.7652) time: 0.1868 data: 0.0945 max mem: 9377 +Train: [14] [ 600/6250] eta: 0:16:39 lr: 0.000122 grad: 0.1549 (0.1979) loss: 0.7544 (0.7642) time: 0.1752 data: 0.0786 max mem: 9377 +Train: [14] [ 700/6250] eta: 0:16:28 lr: 0.000122 grad: 0.1429 (0.1907) loss: 0.7697 (0.7640) time: 0.1701 data: 0.0807 max mem: 9377 +Train: [14] [ 800/6250] eta: 0:16:14 lr: 0.000122 grad: 0.1483 (0.1859) loss: 0.7566 (0.7629) time: 0.1872 data: 0.1030 max mem: 9377 +Train: [14] [ 900/6250] eta: 0:15:51 lr: 0.000122 grad: 0.1532 (0.1819) loss: 0.7682 (0.7622) time: 0.1719 data: 0.0816 max mem: 9377 +Train: [14] [1000/6250] eta: 0:15:32 lr: 0.000122 grad: 0.1428 (0.1781) loss: 0.7726 (0.7624) time: 0.1883 data: 0.1007 max mem: 9377 +Train: [14] [1100/6250] eta: 0:15:08 lr: 0.000122 grad: 0.1379 (0.1749) loss: 0.7647 (0.7623) time: 0.1328 data: 0.0547 max mem: 9377 +Train: [14] [1200/6250] eta: 0:14:50 lr: 0.000122 grad: 0.1372 (0.1724) loss: 0.7760 (0.7623) time: 0.1823 data: 0.0826 max mem: 9377 +Train: [14] [1300/6250] eta: 0:14:32 lr: 0.000122 grad: 0.1457 (0.1700) loss: 0.7527 (0.7626) time: 0.1848 data: 0.0804 max mem: 9377 +Train: [14] [1400/6250] eta: 0:14:12 lr: 0.000122 grad: 0.1384 (0.1683) loss: 0.7512 (0.7624) time: 0.1731 data: 0.0746 max mem: 9377 +Train: [14] [1500/6250] eta: 0:13:49 lr: 0.000122 grad: 0.1409 (0.1671) loss: 0.7797 (0.7623) time: 0.1518 data: 0.0574 max mem: 9377 +Train: [14] [1600/6250] eta: 0:13:28 lr: 0.000122 grad: 0.1351 (0.1659) loss: 0.7654 (0.7624) time: 0.1564 data: 0.0628 max mem: 9377 +Train: [14] [1700/6250] eta: 0:13:09 lr: 0.000122 grad: 0.1399 (0.1643) loss: 0.7566 (0.7624) time: 0.1189 data: 0.0341 max mem: 9377 +Train: [14] [1800/6250] eta: 0:12:56 lr: 0.000122 grad: 0.1387 (0.1631) loss: 0.7656 (0.7624) time: 0.3134 data: 0.2352 max mem: 9377 +Train: [14] [1900/6250] eta: 0:12:33 lr: 0.000122 grad: 0.1358 (0.1624) loss: 0.7632 (0.7621) time: 0.1510 data: 0.0722 max mem: 9377 +Train: [14] [2000/6250] eta: 0:12:14 lr: 0.000122 grad: 0.1309 (0.1616) loss: 0.7655 (0.7620) time: 0.1551 data: 0.0777 max mem: 9377 +Train: [14] [2100/6250] eta: 0:11:58 lr: 0.000122 grad: 0.1422 (0.1608) loss: 0.7547 (0.7617) time: 0.2231 data: 0.1418 max mem: 9377 +Train: [14] [2200/6250] eta: 0:11:39 lr: 0.000122 grad: 0.1392 (0.1601) loss: 0.7540 (0.7616) time: 0.1671 data: 0.0797 max mem: 9377 +Train: [14] [2300/6250] eta: 0:11:19 lr: 0.000122 grad: 0.1372 (0.1596) loss: 0.7490 (0.7615) time: 0.1924 data: 0.1086 max mem: 9377 +Train: [14] [2400/6250] eta: 0:10:59 lr: 0.000122 grad: 0.1334 (0.1589) loss: 0.7693 (0.7616) time: 0.1711 data: 0.0883 max mem: 9377 +Train: [14] [2500/6250] eta: 0:10:37 lr: 0.000122 grad: 0.1576 (0.1585) loss: 0.7502 (0.7615) time: 0.1227 data: 0.0377 max mem: 9377 +Train: [14] [2600/6250] eta: 0:10:19 lr: 0.000122 grad: 0.1416 (0.1579) loss: 0.7672 (0.7615) time: 0.1532 data: 0.0670 max mem: 9377 +Train: [14] [2700/6250] eta: 0:10:00 lr: 0.000122 grad: 0.1402 (0.1574) loss: 0.7498 (0.7615) time: 0.1603 data: 0.0703 max mem: 9377 +Train: [14] [2800/6250] eta: 0:09:41 lr: 0.000122 grad: 0.1418 (0.1569) loss: 0.7533 (0.7615) time: 0.1420 data: 0.0493 max mem: 9377 +Train: [14] [2900/6250] eta: 0:09:22 lr: 0.000122 grad: 0.1408 (0.1564) loss: 0.7540 (0.7615) time: 0.1597 data: 0.0752 max mem: 9377 +Train: [14] [3000/6250] eta: 0:09:04 lr: 0.000122 grad: 0.1305 (0.1558) loss: 0.7814 (0.7617) time: 0.1696 data: 0.0871 max mem: 9377 +Train: [14] [3100/6250] eta: 0:08:47 lr: 0.000122 grad: 0.1417 (0.1557) loss: 0.7642 (0.7619) time: 0.1665 data: 0.0737 max mem: 9377 +Train: [14] [3200/6250] eta: 0:08:30 lr: 0.000122 grad: 0.1408 (0.1552) loss: 0.7688 (0.7621) time: 0.1717 data: 0.0864 max mem: 9377 +Train: [14] [3300/6250] eta: 0:08:12 lr: 0.000122 grad: 0.1417 (0.1550) loss: 0.7721 (0.7623) time: 0.1565 data: 0.0716 max mem: 9377 +Train: [14] [3400/6250] eta: 0:07:54 lr: 0.000122 grad: 0.1422 (0.1549) loss: 0.7544 (0.7623) time: 0.1282 data: 0.0428 max mem: 9377 +Train: [14] [3500/6250] eta: 0:07:37 lr: 0.000122 grad: 0.1510 (0.1549) loss: 0.7642 (0.7624) time: 0.1521 data: 0.0680 max mem: 9377 +Train: [14] [3600/6250] eta: 0:07:19 lr: 0.000122 grad: 0.1552 (0.1546) loss: 0.7536 (0.7625) time: 0.1488 data: 0.0514 max mem: 9377 +Train: [14] [3700/6250] eta: 0:07:02 lr: 0.000122 grad: 0.1363 (0.1544) loss: 0.7723 (0.7626) time: 0.1755 data: 0.0841 max mem: 9377 +Train: [14] [3800/6250] eta: 0:06:45 lr: 0.000122 grad: 0.1447 (0.1541) loss: 0.7602 (0.7626) time: 0.1373 data: 0.0454 max mem: 9377 +Train: [14] [3900/6250] eta: 0:06:27 lr: 0.000122 grad: 0.1414 (0.1540) loss: 0.7508 (0.7626) time: 0.1540 data: 0.0623 max mem: 9377 +Train: [14] [4000/6250] eta: 0:06:10 lr: 0.000122 grad: 0.1383 (0.1538) loss: 0.7633 (0.7626) time: 0.1705 data: 0.0821 max mem: 9377 +Train: [14] [4100/6250] eta: 0:05:54 lr: 0.000122 grad: 0.1369 (0.1536) loss: 0.7506 (0.7626) time: 0.1712 data: 0.0768 max mem: 9377 +Train: [14] [4200/6250] eta: 0:05:37 lr: 0.000122 grad: 0.1332 (0.1533) loss: 0.7623 (0.7625) time: 0.1516 data: 0.0635 max mem: 9377 +Train: [14] [4300/6250] eta: 0:05:20 lr: 0.000122 grad: 0.1338 (0.1533) loss: 0.7539 (0.7624) time: 0.1615 data: 0.0749 max mem: 9377 +Train: [14] [4400/6250] eta: 0:05:03 lr: 0.000122 grad: 0.1410 (0.1531) loss: 0.7594 (0.7624) time: 0.1409 data: 0.0424 max mem: 9377 +Train: [14] [4500/6250] eta: 0:04:46 lr: 0.000122 grad: 0.1421 (0.1529) loss: 0.7602 (0.7623) time: 0.1710 data: 0.0821 max mem: 9377 +Train: [14] [4600/6250] eta: 0:04:29 lr: 0.000122 grad: 0.1418 (0.1528) loss: 0.7527 (0.7622) time: 0.1688 data: 0.0806 max mem: 9377 +Train: [14] [4700/6250] eta: 0:04:12 lr: 0.000122 grad: 0.1502 (0.1526) loss: 0.7532 (0.7621) time: 0.1447 data: 0.0610 max mem: 9377 +Train: [14] [4800/6250] eta: 0:03:56 lr: 0.000122 grad: 0.1303 (0.1524) loss: 0.7755 (0.7620) time: 0.1458 data: 0.0490 max mem: 9377 +Train: [14] [4900/6250] eta: 0:03:39 lr: 0.000122 grad: 0.1440 (0.1522) loss: 0.7469 (0.7621) time: 0.1570 data: 0.0693 max mem: 9377 +Train: [14] [5000/6250] eta: 0:03:23 lr: 0.000122 grad: 0.1496 (0.1520) loss: 0.7543 (0.7621) time: 0.1731 data: 0.0902 max mem: 9377 +Train: [14] [5100/6250] eta: 0:03:06 lr: 0.000122 grad: 0.1360 (0.1519) loss: 0.7629 (0.7622) time: 0.1522 data: 0.0631 max mem: 9377 +Train: [14] [5200/6250] eta: 0:02:50 lr: 0.000122 grad: 0.1350 (0.1516) loss: 0.7592 (0.7623) time: 0.1436 data: 0.0516 max mem: 9377 +Train: [14] [5300/6250] eta: 0:02:33 lr: 0.000122 grad: 0.1465 (0.1514) loss: 0.7599 (0.7623) time: 0.1575 data: 0.0681 max mem: 9377 +Train: [14] [5400/6250] eta: 0:02:17 lr: 0.000122 grad: 0.1252 (0.1512) loss: 0.7738 (0.7625) time: 0.1598 data: 0.0762 max mem: 9377 +Train: [14] [5500/6250] eta: 0:02:01 lr: 0.000122 grad: 0.1334 (0.1510) loss: 0.7583 (0.7625) time: 0.1796 data: 0.0892 max mem: 9377 +Train: [14] [5600/6250] eta: 0:01:45 lr: 0.000122 grad: 0.1360 (0.1508) loss: 0.7822 (0.7627) time: 0.1639 data: 0.0770 max mem: 9377 +Train: [14] [5700/6250] eta: 0:01:28 lr: 0.000122 grad: 0.1379 (0.1505) loss: 0.7733 (0.7628) time: 0.1555 data: 0.0720 max mem: 9377 +Train: [14] [5800/6250] eta: 0:01:12 lr: 0.000122 grad: 0.1343 (0.1504) loss: 0.7632 (0.7629) time: 0.1676 data: 0.0820 max mem: 9377 +Train: [14] [5900/6250] eta: 0:00:56 lr: 0.000122 grad: 0.1381 (0.1501) loss: 0.7699 (0.7630) time: 0.1281 data: 0.0364 max mem: 9377 +Train: [14] [6000/6250] eta: 0:00:40 lr: 0.000122 grad: 0.1342 (0.1499) loss: 0.7698 (0.7630) time: 0.1234 data: 0.0441 max mem: 9377 +Train: [14] [6100/6250] eta: 0:00:24 lr: 0.000122 grad: 0.1435 (0.1498) loss: 0.7596 (0.7630) time: 0.1534 data: 0.0610 max mem: 9377 +Train: [14] [6200/6250] eta: 0:00:08 lr: 0.000122 grad: 0.1389 (0.1496) loss: 0.7693 (0.7631) time: 0.1388 data: 0.0419 max mem: 9377 +Train: [14] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.1430 (0.1496) loss: 0.7455 (0.7630) time: 0.1533 data: 0.0620 max mem: 9377 +Train: [14] Total time: 0:16:52 (0.1620 s / it) +Averaged stats: lr: 0.000122 grad: 0.1430 (0.1496) loss: 0.7455 (0.7630) +Eval (hcp-train-subset): [14] [ 0/62] eta: 0:04:23 loss: 0.8651 (0.8651) time: 4.2498 data: 4.1582 max mem: 9377 +Eval (hcp-train-subset): [14] [61/62] eta: 0:00:00 loss: 0.8680 (0.8711) time: 0.1159 data: 0.0909 max mem: 9377 +Eval (hcp-train-subset): [14] Total time: 0:00:15 (0.2459 s / it) +Averaged stats (hcp-train-subset): loss: 0.8680 (0.8711) +Making plots (hcp-train-subset): example=20 +Eval (hcp-val): [14] [ 0/62] eta: 0:04:55 loss: 0.8643 (0.8643) time: 4.7616 data: 4.7304 max mem: 9377 +Eval (hcp-val): [14] [61/62] eta: 0:00:00 loss: 0.8669 (0.8692) time: 0.1703 data: 0.1450 max mem: 9377 +Eval (hcp-val): [14] Total time: 0:00:14 (0.2409 s / it) +Averaged stats (hcp-val): loss: 0.8669 (0.8692) +Making plots (hcp-val): example=24 +Eval (nsd-val): [14] [ 0/62] eta: 0:04:33 loss: 0.8400 (0.8400) time: 4.4064 data: 4.3219 max mem: 9377 +Eval (nsd-val): [14] [61/62] eta: 0:00:00 loss: 0.8500 (0.8508) time: 0.1719 data: 0.1459 max mem: 9377 +Eval (nsd-val): [14] Total time: 0:00:15 (0.2569 s / it) +Averaged stats (nsd-val): loss: 0.8500 (0.8508) +Making plots (nsd-val): example=6 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00014.pth +Train: [15] [ 0/6250] eta: 11:08:03 lr: 0.000122 grad: 0.1596 (0.1596) loss: 0.7960 (0.7960) time: 6.4133 data: 6.2986 max mem: 9377 +Train: [15] [ 100/6250] eta: 0:24:43 lr: 0.000122 grad: 0.2008 (0.2299) loss: 0.7837 (0.7992) time: 0.2315 data: 0.1372 max mem: 9377 +Train: [15] [ 200/6250] eta: 0:21:57 lr: 0.000122 grad: 0.1735 (0.2071) loss: 0.7740 (0.7943) time: 0.2204 data: 0.1221 max mem: 9377 +Train: [15] [ 300/6250] eta: 0:20:37 lr: 0.000122 grad: 0.1761 (0.2020) loss: 0.7592 (0.7862) time: 0.2075 data: 0.1033 max mem: 9377 +Train: [15] [ 400/6250] eta: 0:19:38 lr: 0.000122 grad: 0.1556 (0.1934) loss: 0.7727 (0.7807) time: 0.1892 data: 0.0780 max mem: 9377 +Train: [15] [ 500/6250] eta: 0:18:50 lr: 0.000122 grad: 0.1439 (0.1854) loss: 0.7475 (0.7770) time: 0.1610 data: 0.0744 max mem: 9377 +Train: [15] [ 600/6250] eta: 0:18:11 lr: 0.000122 grad: 0.1444 (0.1801) loss: 0.7638 (0.7740) time: 0.1777 data: 0.0852 max mem: 9377 +Train: [15] [ 700/6250] eta: 0:17:32 lr: 0.000122 grad: 0.1305 (0.1753) loss: 0.7467 (0.7721) time: 0.1744 data: 0.0788 max mem: 9377 +Train: [15] [ 800/6250] eta: 0:17:02 lr: 0.000122 grad: 0.1469 (0.1717) loss: 0.7565 (0.7705) time: 0.1827 data: 0.0892 max mem: 9377 +Train: [15] [ 900/6250] eta: 0:16:25 lr: 0.000122 grad: 0.1425 (0.1689) loss: 0.7555 (0.7696) time: 0.1685 data: 0.0839 max mem: 9377 +Train: [15] [1000/6250] eta: 0:15:53 lr: 0.000122 grad: 0.1419 (0.1660) loss: 0.7383 (0.7685) time: 0.1531 data: 0.0552 max mem: 9377 +Train: [15] [1100/6250] eta: 0:15:24 lr: 0.000121 grad: 0.1380 (0.1639) loss: 0.7604 (0.7677) time: 0.1729 data: 0.0758 max mem: 9377 +Train: [15] [1200/6250] eta: 0:14:49 lr: 0.000121 grad: 0.1322 (0.1620) loss: 0.7518 (0.7667) time: 0.1374 data: 0.0472 max mem: 9377 +Train: [15] [1300/6250] eta: 0:14:21 lr: 0.000121 grad: 0.1389 (0.1608) loss: 0.7577 (0.7658) time: 0.1679 data: 0.0751 max mem: 9377 +Train: [15] [1400/6250] eta: 0:13:57 lr: 0.000121 grad: 0.1373 (0.1600) loss: 0.7581 (0.7651) time: 0.1502 data: 0.0588 max mem: 9377 +Train: [15] [1500/6250] eta: 0:13:35 lr: 0.000121 grad: 0.1482 (0.1592) loss: 0.7622 (0.7644) time: 0.1701 data: 0.0677 max mem: 9377 +Train: [15] [1600/6250] eta: 0:13:10 lr: 0.000121 grad: 0.1386 (0.1585) loss: 0.7597 (0.7639) time: 0.1397 data: 0.0512 max mem: 9377 +Train: [15] [1700/6250] eta: 0:12:48 lr: 0.000121 grad: 0.1456 (0.1583) loss: 0.7684 (0.7637) time: 0.1461 data: 0.0483 max mem: 9377 +Train: [15] [1800/6250] eta: 0:12:27 lr: 0.000121 grad: 0.1528 (0.1577) loss: 0.7587 (0.7634) time: 0.1501 data: 0.0677 max mem: 9377 +Train: [15] [1900/6250] eta: 0:12:06 lr: 0.000121 grad: 0.1419 (0.1573) loss: 0.7525 (0.7629) time: 0.1627 data: 0.0775 max mem: 9377 +Train: [15] [2000/6250] eta: 0:11:47 lr: 0.000121 grad: 0.1502 (0.1572) loss: 0.7494 (0.7623) time: 0.1660 data: 0.0883 max mem: 9377 +Train: [15] [2100/6250] eta: 0:11:28 lr: 0.000121 grad: 0.1434 (0.1567) loss: 0.7467 (0.7617) time: 0.1699 data: 0.0800 max mem: 9377 +Train: [15] [2200/6250] eta: 0:11:07 lr: 0.000121 grad: 0.1469 (0.1566) loss: 0.7520 (0.7613) time: 0.1593 data: 0.0742 max mem: 9377 +Train: [15] [2300/6250] eta: 0:10:50 lr: 0.000121 grad: 0.1421 (0.1567) loss: 0.7530 (0.7608) time: 0.1613 data: 0.0783 max mem: 9377 +Train: [15] [2400/6250] eta: 0:10:32 lr: 0.000121 grad: 0.1472 (0.1563) loss: 0.7421 (0.7602) time: 0.1519 data: 0.0645 max mem: 9377 +Train: [15] [2500/6250] eta: 0:10:16 lr: 0.000121 grad: 0.1496 (0.1560) loss: 0.7496 (0.7598) time: 0.1530 data: 0.0583 max mem: 9377 +Train: [15] [2600/6250] eta: 0:09:59 lr: 0.000121 grad: 0.1494 (0.1557) loss: 0.7428 (0.7593) time: 0.1642 data: 0.0776 max mem: 9377 +Train: [15] [2700/6250] eta: 0:09:43 lr: 0.000121 grad: 0.1597 (0.1555) loss: 0.7196 (0.7586) time: 0.1385 data: 0.0538 max mem: 9377 +Train: [15] [2800/6250] eta: 0:09:25 lr: 0.000121 grad: 0.1450 (0.1553) loss: 0.7499 (0.7581) time: 0.1450 data: 0.0554 max mem: 9377 +Train: [15] [2900/6250] eta: 0:09:08 lr: 0.000121 grad: 0.1499 (0.1550) loss: 0.7437 (0.7578) time: 0.1363 data: 0.0430 max mem: 9377 +Train: [15] [3000/6250] eta: 0:08:51 lr: 0.000121 grad: 0.1433 (0.1547) loss: 0.7558 (0.7576) time: 0.1690 data: 0.0938 max mem: 9377 +Train: [15] [3100/6250] eta: 0:08:33 lr: 0.000121 grad: 0.1369 (0.1544) loss: 0.7478 (0.7576) time: 0.1661 data: 0.0759 max mem: 9377 +Train: [15] [3200/6250] eta: 0:08:16 lr: 0.000121 grad: 0.1364 (0.1541) loss: 0.7508 (0.7576) time: 0.1547 data: 0.0634 max mem: 9377 +Train: [15] [3300/6250] eta: 0:08:00 lr: 0.000121 grad: 0.1360 (0.1538) loss: 0.7584 (0.7576) time: 0.1832 data: 0.1032 max mem: 9377 +Train: [15] [3400/6250] eta: 0:07:42 lr: 0.000121 grad: 0.1423 (0.1536) loss: 0.7689 (0.7576) time: 0.1321 data: 0.0421 max mem: 9377 +Train: [15] [3500/6250] eta: 0:07:26 lr: 0.000121 grad: 0.1342 (0.1532) loss: 0.7594 (0.7578) time: 0.1527 data: 0.0656 max mem: 9377 +Train: [15] [3600/6250] eta: 0:07:09 lr: 0.000121 grad: 0.1505 (0.1528) loss: 0.7382 (0.7579) time: 0.1672 data: 0.0832 max mem: 9377 +Train: [15] [3700/6250] eta: 0:06:53 lr: 0.000121 grad: 0.1359 (0.1526) loss: 0.7708 (0.7579) time: 0.1569 data: 0.0793 max mem: 9377 +Train: [15] [3800/6250] eta: 0:06:36 lr: 0.000121 grad: 0.1451 (0.1523) loss: 0.7585 (0.7579) time: 0.1412 data: 0.0529 max mem: 9377 +Train: [15] [3900/6250] eta: 0:06:19 lr: 0.000121 grad: 0.1423 (0.1524) loss: 0.7442 (0.7578) time: 0.1595 data: 0.0688 max mem: 9377 +Train: [15] [4000/6250] eta: 0:06:03 lr: 0.000121 grad: 0.1400 (0.1521) loss: 0.7536 (0.7577) time: 0.1685 data: 0.0886 max mem: 9377 +Train: [15] [4100/6250] eta: 0:05:47 lr: 0.000121 grad: 0.1408 (0.1520) loss: 0.7511 (0.7577) time: 0.1532 data: 0.0686 max mem: 9377 +Train: [15] [4200/6250] eta: 0:05:30 lr: 0.000121 grad: 0.1359 (0.1517) loss: 0.7388 (0.7577) time: 0.1698 data: 0.0872 max mem: 9377 +Train: [15] [4300/6250] eta: 0:05:14 lr: 0.000121 grad: 0.1376 (0.1514) loss: 0.7621 (0.7577) time: 0.1695 data: 0.0688 max mem: 9377 +Train: [15] [4400/6250] eta: 0:04:57 lr: 0.000121 grad: 0.1346 (0.1511) loss: 0.7667 (0.7579) time: 0.1622 data: 0.0770 max mem: 9377 +Train: [15] [4500/6250] eta: 0:04:41 lr: 0.000121 grad: 0.1447 (0.1509) loss: 0.7597 (0.7579) time: 0.1390 data: 0.0450 max mem: 9377 +Train: [15] [4600/6250] eta: 0:04:25 lr: 0.000121 grad: 0.1424 (0.1508) loss: 0.7537 (0.7579) time: 0.1829 data: 0.0960 max mem: 9377 +Train: [15] [4700/6250] eta: 0:04:09 lr: 0.000121 grad: 0.1509 (0.1508) loss: 0.7531 (0.7578) time: 0.1521 data: 0.0666 max mem: 9377 +Train: [15] [4800/6250] eta: 0:03:53 lr: 0.000121 grad: 0.1400 (0.1507) loss: 0.7578 (0.7577) time: 0.1356 data: 0.0476 max mem: 9377 +Train: [15] [4900/6250] eta: 0:03:36 lr: 0.000121 grad: 0.1322 (0.1506) loss: 0.7646 (0.7576) time: 0.1604 data: 0.0772 max mem: 9377 +Train: [15] [5000/6250] eta: 0:03:20 lr: 0.000121 grad: 0.1427 (0.1504) loss: 0.7462 (0.7576) time: 0.1626 data: 0.0735 max mem: 9377 +Train: [15] [5100/6250] eta: 0:03:04 lr: 0.000121 grad: 0.1448 (0.1504) loss: 0.7579 (0.7575) time: 0.1474 data: 0.0601 max mem: 9377 +Train: [15] [5200/6250] eta: 0:02:48 lr: 0.000121 grad: 0.1386 (0.1502) loss: 0.7539 (0.7574) time: 0.1532 data: 0.0670 max mem: 9377 +Train: [15] [5300/6250] eta: 0:02:32 lr: 0.000121 grad: 0.1473 (0.1503) loss: 0.7428 (0.7573) time: 0.1696 data: 0.0873 max mem: 9377 +Train: [15] [5400/6250] eta: 0:02:16 lr: 0.000121 grad: 0.1434 (0.1502) loss: 0.7537 (0.7571) time: 0.1405 data: 0.0513 max mem: 9377 +Train: [15] [5500/6250] eta: 0:02:00 lr: 0.000121 grad: 0.1434 (0.1501) loss: 0.7379 (0.7570) time: 0.1807 data: 0.0961 max mem: 9377 +Train: [15] [5600/6250] eta: 0:01:44 lr: 0.000121 grad: 0.1401 (0.1501) loss: 0.7505 (0.7568) time: 0.1742 data: 0.0865 max mem: 9377 +Train: [15] [5700/6250] eta: 0:01:28 lr: 0.000121 grad: 0.1467 (0.1501) loss: 0.7562 (0.7567) time: 0.1415 data: 0.0549 max mem: 9377 +Train: [15] [5800/6250] eta: 0:01:11 lr: 0.000121 grad: 0.1458 (0.1500) loss: 0.7616 (0.7566) time: 0.1427 data: 0.0532 max mem: 9377 +Train: [15] [5900/6250] eta: 0:00:55 lr: 0.000121 grad: 0.1511 (0.1500) loss: 0.7415 (0.7565) time: 0.1620 data: 0.0758 max mem: 9377 +Train: [15] [6000/6250] eta: 0:00:39 lr: 0.000121 grad: 0.1406 (0.1500) loss: 0.7467 (0.7564) time: 0.1592 data: 0.0719 max mem: 9377 +Train: [15] [6100/6250] eta: 0:00:23 lr: 0.000121 grad: 0.1555 (0.1501) loss: 0.7452 (0.7563) time: 0.1923 data: 0.1091 max mem: 9377 +Train: [15] [6200/6250] eta: 0:00:07 lr: 0.000121 grad: 0.1435 (0.1500) loss: 0.7435 (0.7561) time: 0.1481 data: 0.0628 max mem: 9377 +Train: [15] [6249/6250] eta: 0:00:00 lr: 0.000121 grad: 0.1358 (0.1500) loss: 0.7562 (0.7561) time: 0.1607 data: 0.0740 max mem: 9377 +Train: [15] Total time: 0:16:45 (0.1609 s / it) +Averaged stats: lr: 0.000121 grad: 0.1358 (0.1500) loss: 0.7562 (0.7561) +Eval (hcp-train-subset): [15] [ 0/62] eta: 0:05:48 loss: 0.8601 (0.8601) time: 5.6252 data: 5.5875 max mem: 9377 +Eval (hcp-train-subset): [15] [61/62] eta: 0:00:00 loss: 0.8684 (0.8689) time: 0.1461 data: 0.1210 max mem: 9377 +Eval (hcp-train-subset): [15] Total time: 0:00:15 (0.2478 s / it) +Averaged stats (hcp-train-subset): loss: 0.8684 (0.8689) +Eval (hcp-val): [15] [ 0/62] eta: 0:04:28 loss: 0.8663 (0.8663) time: 4.3374 data: 4.2717 max mem: 9377 +Eval (hcp-val): [15] [61/62] eta: 0:00:00 loss: 0.8662 (0.8673) time: 0.1344 data: 0.1075 max mem: 9377 +Eval (hcp-val): [15] Total time: 0:00:15 (0.2571 s / it) +Averaged stats (hcp-val): loss: 0.8662 (0.8673) +Eval (nsd-val): [15] [ 0/62] eta: 0:06:05 loss: 0.8339 (0.8339) time: 5.8898 data: 5.8572 max mem: 9377 +Eval (nsd-val): [15] [61/62] eta: 0:00:00 loss: 0.8465 (0.8457) time: 0.1426 data: 0.1173 max mem: 9377 +Eval (nsd-val): [15] Total time: 0:00:15 (0.2451 s / it) +Averaged stats (nsd-val): loss: 0.8465 (0.8457) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [16] [ 0/6250] eta: 8:21:46 lr: 0.000121 grad: 0.4550 (0.4550) loss: 0.8000 (0.8000) time: 4.8170 data: 4.5567 max mem: 9377 +Train: [16] [ 100/6250] eta: 0:22:29 lr: 0.000121 grad: 0.2139 (0.2379) loss: 0.7481 (0.7664) time: 0.1683 data: 0.0748 max mem: 9377 +Train: [16] [ 200/6250] eta: 0:20:15 lr: 0.000121 grad: 0.1647 (0.2143) loss: 0.7504 (0.7546) time: 0.1960 data: 0.1129 max mem: 9377 +Train: [16] [ 300/6250] eta: 0:19:14 lr: 0.000121 grad: 0.1651 (0.1979) loss: 0.7636 (0.7569) time: 0.2063 data: 0.1103 max mem: 9377 +Train: [16] [ 400/6250] eta: 0:18:16 lr: 0.000121 grad: 0.1512 (0.1880) loss: 0.7677 (0.7593) time: 0.1612 data: 0.0607 max mem: 9377 +Train: [16] [ 500/6250] eta: 0:17:38 lr: 0.000121 grad: 0.1584 (0.1836) loss: 0.7597 (0.7597) time: 0.1655 data: 0.0610 max mem: 9377 +Train: [16] [ 600/6250] eta: 0:17:05 lr: 0.000121 grad: 0.1741 (0.1827) loss: 0.7541 (0.7589) time: 0.1621 data: 0.0607 max mem: 9377 +Train: [16] [ 700/6250] eta: 0:16:44 lr: 0.000121 grad: 0.1568 (0.1816) loss: 0.7637 (0.7585) time: 0.2598 data: 0.1780 max mem: 9377 +Train: [16] [ 800/6250] eta: 0:16:20 lr: 0.000121 grad: 0.1436 (0.1779) loss: 0.7626 (0.7581) time: 0.1695 data: 0.0857 max mem: 9377 +Train: [16] [ 900/6250] eta: 0:15:56 lr: 0.000121 grad: 0.1576 (0.1753) loss: 0.7502 (0.7581) time: 0.1512 data: 0.0488 max mem: 9377 +Train: [16] [1000/6250] eta: 0:15:25 lr: 0.000121 grad: 0.1460 (0.1729) loss: 0.7505 (0.7581) time: 0.1452 data: 0.0573 max mem: 9377 +Train: [16] [1100/6250] eta: 0:15:07 lr: 0.000121 grad: 0.1390 (0.1701) loss: 0.7730 (0.7586) time: 0.1462 data: 0.0592 max mem: 9377 +Train: [16] [1200/6250] eta: 0:14:44 lr: 0.000121 grad: 0.1440 (0.1676) loss: 0.7618 (0.7589) time: 0.1504 data: 0.0551 max mem: 9377 +Train: [16] [1300/6250] eta: 0:14:19 lr: 0.000121 grad: 0.1425 (0.1657) loss: 0.7413 (0.7587) time: 0.1543 data: 0.0757 max mem: 9377 +Train: [16] [1400/6250] eta: 0:13:52 lr: 0.000121 grad: 0.1367 (0.1644) loss: 0.7502 (0.7584) time: 0.1691 data: 0.0806 max mem: 9377 +Train: [16] [1500/6250] eta: 0:13:29 lr: 0.000121 grad: 0.1403 (0.1630) loss: 0.7405 (0.7580) time: 0.1583 data: 0.0660 max mem: 9377 +Train: [16] [1600/6250] eta: 0:13:05 lr: 0.000121 grad: 0.1360 (0.1618) loss: 0.7727 (0.7579) time: 0.1436 data: 0.0540 max mem: 9377 +Train: [16] [1700/6250] eta: 0:12:43 lr: 0.000121 grad: 0.1460 (0.1605) loss: 0.7380 (0.7577) time: 0.1312 data: 0.0381 max mem: 9377 +Train: [16] [1800/6250] eta: 0:12:21 lr: 0.000121 grad: 0.1404 (0.1597) loss: 0.7485 (0.7575) time: 0.1509 data: 0.0623 max mem: 9377 +Train: [16] [1900/6250] eta: 0:12:01 lr: 0.000121 grad: 0.1473 (0.1592) loss: 0.7529 (0.7574) time: 0.1580 data: 0.0578 max mem: 9377 +Train: [16] [2000/6250] eta: 0:11:44 lr: 0.000121 grad: 0.1484 (0.1587) loss: 0.7687 (0.7572) time: 0.1645 data: 0.0819 max mem: 9377 +Train: [16] [2100/6250] eta: 0:11:26 lr: 0.000121 grad: 0.1368 (0.1580) loss: 0.7506 (0.7569) time: 0.1546 data: 0.0703 max mem: 9377 +Train: [16] [2200/6250] eta: 0:11:10 lr: 0.000121 grad: 0.1425 (0.1575) loss: 0.7623 (0.7567) time: 0.1586 data: 0.0666 max mem: 9377 +Train: [16] [2300/6250] eta: 0:10:51 lr: 0.000121 grad: 0.1356 (0.1567) loss: 0.7472 (0.7566) time: 0.1381 data: 0.0507 max mem: 9377 +Train: [16] [2400/6250] eta: 0:10:33 lr: 0.000121 grad: 0.1341 (0.1562) loss: 0.7627 (0.7563) time: 0.1534 data: 0.0587 max mem: 9377 +Train: [16] [2500/6250] eta: 0:10:16 lr: 0.000121 grad: 0.1409 (0.1556) loss: 0.7410 (0.7561) time: 0.1619 data: 0.0748 max mem: 9377 +Train: [16] [2600/6250] eta: 0:10:00 lr: 0.000121 grad: 0.1452 (0.1553) loss: 0.7487 (0.7559) time: 0.1822 data: 0.0975 max mem: 9377 +Train: [16] [2700/6250] eta: 0:09:41 lr: 0.000121 grad: 0.1463 (0.1551) loss: 0.7461 (0.7556) time: 0.1377 data: 0.0449 max mem: 9377 +Train: [16] [2800/6250] eta: 0:09:23 lr: 0.000121 grad: 0.1475 (0.1547) loss: 0.7370 (0.7554) time: 0.1432 data: 0.0592 max mem: 9377 +Train: [16] [2900/6250] eta: 0:09:06 lr: 0.000121 grad: 0.1432 (0.1543) loss: 0.7434 (0.7552) time: 0.1493 data: 0.0633 max mem: 9377 +Train: [16] [3000/6250] eta: 0:08:49 lr: 0.000121 grad: 0.1366 (0.1542) loss: 0.7436 (0.7549) time: 0.1702 data: 0.0819 max mem: 9377 +Train: [16] [3100/6250] eta: 0:08:32 lr: 0.000121 grad: 0.1552 (0.1541) loss: 0.7455 (0.7545) time: 0.1658 data: 0.0833 max mem: 9377 +Train: [16] [3200/6250] eta: 0:08:15 lr: 0.000121 grad: 0.1527 (0.1539) loss: 0.7440 (0.7543) time: 0.1414 data: 0.0536 max mem: 9377 +Train: [16] [3300/6250] eta: 0:07:58 lr: 0.000121 grad: 0.1414 (0.1537) loss: 0.7479 (0.7540) time: 0.1765 data: 0.0921 max mem: 9377 +Train: [16] [3400/6250] eta: 0:07:42 lr: 0.000121 grad: 0.1467 (0.1537) loss: 0.7466 (0.7537) time: 0.1595 data: 0.0690 max mem: 9377 +Train: [16] [3500/6250] eta: 0:07:25 lr: 0.000120 grad: 0.1401 (0.1534) loss: 0.7527 (0.7536) time: 0.1140 data: 0.0294 max mem: 9377 +Train: [16] [3600/6250] eta: 0:07:08 lr: 0.000120 grad: 0.1380 (0.1534) loss: 0.7457 (0.7533) time: 0.1531 data: 0.0589 max mem: 9377 +Train: [16] [3700/6250] eta: 0:06:51 lr: 0.000120 grad: 0.1476 (0.1533) loss: 0.7409 (0.7531) time: 0.1441 data: 0.0598 max mem: 9377 +Train: [16] [3800/6250] eta: 0:06:34 lr: 0.000120 grad: 0.1517 (0.1532) loss: 0.7288 (0.7528) time: 0.1542 data: 0.0674 max mem: 9377 +Train: [16] [3900/6250] eta: 0:06:17 lr: 0.000120 grad: 0.1389 (0.1530) loss: 0.7539 (0.7527) time: 0.1378 data: 0.0577 max mem: 9377 +Train: [16] [4000/6250] eta: 0:06:01 lr: 0.000120 grad: 0.1428 (0.1528) loss: 0.7582 (0.7525) time: 0.1454 data: 0.0480 max mem: 9377 +Train: [16] [4100/6250] eta: 0:05:44 lr: 0.000120 grad: 0.1548 (0.1527) loss: 0.7349 (0.7523) time: 0.1352 data: 0.0471 max mem: 9377 +Train: [16] [4200/6250] eta: 0:05:28 lr: 0.000120 grad: 0.1407 (0.1525) loss: 0.7459 (0.7519) time: 0.1587 data: 0.0774 max mem: 9377 +Train: [16] [4300/6250] eta: 0:05:12 lr: 0.000120 grad: 0.1351 (0.1525) loss: 0.7446 (0.7517) time: 0.1117 data: 0.0152 max mem: 9377 +Train: [16] [4400/6250] eta: 0:04:56 lr: 0.000120 grad: 0.1349 (0.1523) loss: 0.7401 (0.7515) time: 0.1718 data: 0.0783 max mem: 9377 +Train: [16] [4500/6250] eta: 0:04:39 lr: 0.000120 grad: 0.1438 (0.1522) loss: 0.7380 (0.7514) time: 0.1508 data: 0.0658 max mem: 9377 +Train: [16] [4600/6250] eta: 0:04:23 lr: 0.000120 grad: 0.1487 (0.1521) loss: 0.7395 (0.7512) time: 0.1208 data: 0.0322 max mem: 9377 +Train: [16] [4700/6250] eta: 0:04:07 lr: 0.000120 grad: 0.1467 (0.1520) loss: 0.7484 (0.7511) time: 0.1826 data: 0.0912 max mem: 9377 +Train: [16] [4800/6250] eta: 0:03:51 lr: 0.000120 grad: 0.1330 (0.1519) loss: 0.7492 (0.7510) time: 0.1401 data: 0.0476 max mem: 9377 +Train: [16] [4900/6250] eta: 0:03:35 lr: 0.000120 grad: 0.1535 (0.1518) loss: 0.7234 (0.7508) time: 0.1601 data: 0.0761 max mem: 9377 +Train: [16] [5000/6250] eta: 0:03:19 lr: 0.000120 grad: 0.1389 (0.1517) loss: 0.7464 (0.7507) time: 0.1426 data: 0.0534 max mem: 9377 +Train: [16] [5100/6250] eta: 0:03:03 lr: 0.000120 grad: 0.1465 (0.1517) loss: 0.7332 (0.7504) time: 0.1166 data: 0.0270 max mem: 9377 +Train: [16] [5200/6250] eta: 0:02:47 lr: 0.000120 grad: 0.1526 (0.1517) loss: 0.7462 (0.7503) time: 0.1486 data: 0.0673 max mem: 9377 +Train: [16] [5300/6250] eta: 0:02:31 lr: 0.000120 grad: 0.1405 (0.1516) loss: 0.7430 (0.7502) time: 0.1542 data: 0.0649 max mem: 9377 +Train: [16] [5400/6250] eta: 0:02:15 lr: 0.000120 grad: 0.1376 (0.1515) loss: 0.7475 (0.7501) time: 0.1666 data: 0.0846 max mem: 9377 +Train: [16] [5500/6250] eta: 0:01:59 lr: 0.000120 grad: 0.1520 (0.1514) loss: 0.7507 (0.7501) time: 0.1764 data: 0.0904 max mem: 9377 +Train: [16] [5600/6250] eta: 0:01:43 lr: 0.000120 grad: 0.1379 (0.1513) loss: 0.7534 (0.7500) time: 0.1545 data: 0.0651 max mem: 9377 +Train: [16] [5700/6250] eta: 0:01:27 lr: 0.000120 grad: 0.1353 (0.1511) loss: 0.7542 (0.7500) time: 0.1382 data: 0.0465 max mem: 9377 +Train: [16] [5800/6250] eta: 0:01:11 lr: 0.000120 grad: 0.1510 (0.1510) loss: 0.7524 (0.7499) time: 0.1528 data: 0.0642 max mem: 9377 +Train: [16] [5900/6250] eta: 0:00:55 lr: 0.000120 grad: 0.1501 (0.1509) loss: 0.7478 (0.7499) time: 0.1713 data: 0.0852 max mem: 9377 +Train: [16] [6000/6250] eta: 0:00:39 lr: 0.000120 grad: 0.1465 (0.1508) loss: 0.7406 (0.7497) time: 0.1686 data: 0.0839 max mem: 9377 +Train: [16] [6100/6250] eta: 0:00:23 lr: 0.000120 grad: 0.1388 (0.1508) loss: 0.7370 (0.7496) time: 0.1447 data: 0.0552 max mem: 9377 +Train: [16] [6200/6250] eta: 0:00:07 lr: 0.000120 grad: 0.1451 (0.1508) loss: 0.7438 (0.7495) time: 0.1553 data: 0.0689 max mem: 9377 +Train: [16] [6249/6250] eta: 0:00:00 lr: 0.000120 grad: 0.1423 (0.1508) loss: 0.7331 (0.7495) time: 0.1557 data: 0.0705 max mem: 9377 +Train: [16] Total time: 0:16:43 (0.1605 s / it) +Averaged stats: lr: 0.000120 grad: 0.1423 (0.1508) loss: 0.7331 (0.7495) +Eval (hcp-train-subset): [16] [ 0/62] eta: 0:05:45 loss: 0.8660 (0.8660) time: 5.5805 data: 5.5480 max mem: 9377 +Eval (hcp-train-subset): [16] [61/62] eta: 0:00:00 loss: 0.8688 (0.8702) time: 0.1452 data: 0.1184 max mem: 9377 +Eval (hcp-train-subset): [16] Total time: 0:00:14 (0.2349 s / it) +Averaged stats (hcp-train-subset): loss: 0.8688 (0.8702) +Eval (hcp-val): [16] [ 0/62] eta: 0:03:22 loss: 0.8613 (0.8613) time: 3.2663 data: 3.1785 max mem: 9377 +Eval (hcp-val): [16] [61/62] eta: 0:00:00 loss: 0.8696 (0.8692) time: 0.1396 data: 0.1146 max mem: 9377 +Eval (hcp-val): [16] Total time: 0:00:16 (0.2729 s / it) +Averaged stats (hcp-val): loss: 0.8696 (0.8692) +Eval (nsd-val): [16] [ 0/62] eta: 0:05:34 loss: 0.8403 (0.8403) time: 5.3898 data: 5.3248 max mem: 9377 +Eval (nsd-val): [16] [61/62] eta: 0:00:00 loss: 0.8508 (0.8525) time: 0.1425 data: 0.1171 max mem: 9377 +Eval (nsd-val): [16] Total time: 0:00:14 (0.2374 s / it) +Averaged stats (nsd-val): loss: 0.8508 (0.8525) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [17] [ 0/6250] eta: 10:25:18 lr: 0.000120 grad: 0.1642 (0.1642) loss: 0.7796 (0.7796) time: 6.0029 data: 5.8972 max mem: 9377 +Train: [17] [ 100/6250] eta: 0:21:53 lr: 0.000120 grad: 0.2407 (0.2562) loss: 0.7378 (0.7685) time: 0.1335 data: 0.0394 max mem: 9377 +Train: [17] [ 200/6250] eta: 0:19:30 lr: 0.000120 grad: 0.2139 (0.2551) loss: 0.7762 (0.7689) time: 0.1746 data: 0.0821 max mem: 9377 +Train: [17] [ 300/6250] eta: 0:18:19 lr: 0.000120 grad: 0.1717 (0.2331) loss: 0.7628 (0.7674) time: 0.1624 data: 0.0655 max mem: 9377 +Train: [17] [ 400/6250] eta: 0:17:34 lr: 0.000120 grad: 0.1378 (0.2154) loss: 0.7654 (0.7667) time: 0.1451 data: 0.0572 max mem: 9377 +Train: [17] [ 500/6250] eta: 0:16:47 lr: 0.000120 grad: 0.1581 (0.2047) loss: 0.7632 (0.7658) time: 0.1445 data: 0.0530 max mem: 9377 +Train: [17] [ 600/6250] eta: 0:16:11 lr: 0.000120 grad: 0.1500 (0.1980) loss: 0.7655 (0.7645) time: 0.1705 data: 0.0851 max mem: 9377 +Train: [17] [ 700/6250] eta: 0:15:42 lr: 0.000120 grad: 0.1722 (0.1951) loss: 0.7521 (0.7634) time: 0.1747 data: 0.0959 max mem: 9377 +Train: [17] [ 800/6250] eta: 0:15:23 lr: 0.000120 grad: 0.1437 (0.1907) loss: 0.7587 (0.7625) time: 0.1642 data: 0.0739 max mem: 9377 +Train: [17] [ 900/6250] eta: 0:15:04 lr: 0.000120 grad: 0.1460 (0.1863) loss: 0.7370 (0.7610) time: 0.1941 data: 0.1051 max mem: 9377 +Train: [17] [1000/6250] eta: 0:14:35 lr: 0.000120 grad: 0.1487 (0.1827) loss: 0.7297 (0.7591) time: 0.1308 data: 0.0418 max mem: 9377 +Train: [17] [1100/6250] eta: 0:14:13 lr: 0.000120 grad: 0.1489 (0.1795) loss: 0.7383 (0.7577) time: 0.1509 data: 0.0562 max mem: 9377 +Train: [17] [1200/6250] eta: 0:14:09 lr: 0.000120 grad: 0.1389 (0.1763) loss: 0.7411 (0.7567) time: 0.1957 data: 0.1062 max mem: 9377 +Train: [17] [1300/6250] eta: 0:13:56 lr: 0.000120 grad: 0.1407 (0.1741) loss: 0.7327 (0.7558) time: 0.1755 data: 0.0930 max mem: 9377 +Train: [17] [1400/6250] eta: 0:13:40 lr: 0.000120 grad: 0.1412 (0.1719) loss: 0.7365 (0.7550) time: 0.1785 data: 0.0884 max mem: 9377 +Train: [17] [1500/6250] eta: 0:13:23 lr: 0.000120 grad: 0.1507 (0.1703) loss: 0.7333 (0.7537) time: 0.1782 data: 0.0715 max mem: 9377 +Train: [17] [1600/6250] eta: 0:13:11 lr: 0.000120 grad: 0.1480 (0.1691) loss: 0.7339 (0.7528) time: 0.2202 data: 0.1102 max mem: 9377 +Train: [17] [1700/6250] eta: 0:12:53 lr: 0.000120 grad: 0.1500 (0.1681) loss: 0.7382 (0.7516) time: 0.1698 data: 0.0719 max mem: 9377 +Train: [17] [1800/6250] eta: 0:12:32 lr: 0.000120 grad: 0.1582 (0.1670) loss: 0.7342 (0.7507) time: 0.1710 data: 0.0754 max mem: 9377 +Train: [17] [1900/6250] eta: 0:12:14 lr: 0.000120 grad: 0.1509 (0.1661) loss: 0.7339 (0.7497) time: 0.1560 data: 0.0595 max mem: 9377 +Train: [17] [2000/6250] eta: 0:11:56 lr: 0.000120 grad: 0.1438 (0.1653) loss: 0.7309 (0.7490) time: 0.1536 data: 0.0695 max mem: 9377 +Train: [17] [2100/6250] eta: 0:11:38 lr: 0.000120 grad: 0.1434 (0.1643) loss: 0.7307 (0.7486) time: 0.1523 data: 0.0656 max mem: 9377 +Train: [17] [2200/6250] eta: 0:11:20 lr: 0.000120 grad: 0.1393 (0.1634) loss: 0.7437 (0.7483) time: 0.1725 data: 0.0833 max mem: 9377 +Train: [17] [2300/6250] eta: 0:11:03 lr: 0.000120 grad: 0.1427 (0.1627) loss: 0.7500 (0.7480) time: 0.1563 data: 0.0684 max mem: 9377 +Train: [17] [2400/6250] eta: 0:10:45 lr: 0.000120 grad: 0.1458 (0.1621) loss: 0.7346 (0.7474) time: 0.1598 data: 0.0751 max mem: 9377 +Train: [17] [2500/6250] eta: 0:10:27 lr: 0.000120 grad: 0.1441 (0.1615) loss: 0.7332 (0.7470) time: 0.1493 data: 0.0570 max mem: 9377 +Train: [17] [2600/6250] eta: 0:10:08 lr: 0.000120 grad: 0.1364 (0.1611) loss: 0.7472 (0.7467) time: 0.1464 data: 0.0526 max mem: 9377 +Train: [17] [2700/6250] eta: 0:09:49 lr: 0.000120 grad: 0.1456 (0.1604) loss: 0.7390 (0.7466) time: 0.1571 data: 0.0578 max mem: 9377 +Train: [17] [2800/6250] eta: 0:09:31 lr: 0.000120 grad: 0.1416 (0.1601) loss: 0.7447 (0.7463) time: 0.1585 data: 0.0693 max mem: 9377 +Train: [17] [2900/6250] eta: 0:09:12 lr: 0.000120 grad: 0.1407 (0.1596) loss: 0.7370 (0.7461) time: 0.1408 data: 0.0613 max mem: 9377 +Train: [17] [3000/6250] eta: 0:08:55 lr: 0.000120 grad: 0.1422 (0.1592) loss: 0.7397 (0.7459) time: 0.1536 data: 0.0568 max mem: 9377 +Train: [17] [3100/6250] eta: 0:08:38 lr: 0.000120 grad: 0.1448 (0.1588) loss: 0.7158 (0.7453) time: 0.1584 data: 0.0710 max mem: 9377 +Train: [17] [3200/6250] eta: 0:08:20 lr: 0.000120 grad: 0.1560 (0.1585) loss: 0.7314 (0.7449) time: 0.1755 data: 0.0930 max mem: 9377 +Train: [17] [3300/6250] eta: 0:08:04 lr: 0.000120 grad: 0.1515 (0.1583) loss: 0.7232 (0.7443) time: 0.1736 data: 0.0830 max mem: 9377 +Train: [17] [3400/6250] eta: 0:07:47 lr: 0.000120 grad: 0.1471 (0.1581) loss: 0.7388 (0.7439) time: 0.1780 data: 0.0904 max mem: 9377 +Train: [17] [3500/6250] eta: 0:07:30 lr: 0.000120 grad: 0.1667 (0.1580) loss: 0.7287 (0.7435) time: 0.1590 data: 0.0755 max mem: 9377 +Train: [17] [3600/6250] eta: 0:07:13 lr: 0.000120 grad: 0.1530 (0.1579) loss: 0.7279 (0.7431) time: 0.1793 data: 0.0952 max mem: 9377 +Train: [17] [3700/6250] eta: 0:06:55 lr: 0.000120 grad: 0.1605 (0.1578) loss: 0.7359 (0.7428) time: 0.1365 data: 0.0402 max mem: 9377 +Train: [17] [3800/6250] eta: 0:06:38 lr: 0.000120 grad: 0.1569 (0.1578) loss: 0.7526 (0.7425) time: 0.1434 data: 0.0476 max mem: 9377 +Train: [17] [3900/6250] eta: 0:06:21 lr: 0.000120 grad: 0.1501 (0.1577) loss: 0.7187 (0.7421) time: 0.1449 data: 0.0567 max mem: 9377 +Train: [17] [4000/6250] eta: 0:06:05 lr: 0.000120 grad: 0.1433 (0.1575) loss: 0.7376 (0.7418) time: 0.1681 data: 0.0801 max mem: 9377 +Train: [17] [4100/6250] eta: 0:05:48 lr: 0.000120 grad: 0.1429 (0.1574) loss: 0.7369 (0.7415) time: 0.1732 data: 0.0866 max mem: 9377 +Train: [17] [4200/6250] eta: 0:05:31 lr: 0.000120 grad: 0.1462 (0.1572) loss: 0.7365 (0.7414) time: 0.1497 data: 0.0542 max mem: 9377 +Train: [17] [4300/6250] eta: 0:05:14 lr: 0.000120 grad: 0.1441 (0.1570) loss: 0.7324 (0.7413) time: 0.1444 data: 0.0566 max mem: 9377 +Train: [17] [4400/6250] eta: 0:04:58 lr: 0.000120 grad: 0.1379 (0.1567) loss: 0.7578 (0.7414) time: 0.1724 data: 0.0913 max mem: 9377 +Train: [17] [4500/6250] eta: 0:04:41 lr: 0.000120 grad: 0.1505 (0.1566) loss: 0.7379 (0.7413) time: 0.1596 data: 0.0742 max mem: 9377 +Train: [17] [4600/6250] eta: 0:04:25 lr: 0.000120 grad: 0.1429 (0.1564) loss: 0.7507 (0.7414) time: 0.1642 data: 0.0747 max mem: 9377 +Train: [17] [4700/6250] eta: 0:04:09 lr: 0.000120 grad: 0.1476 (0.1562) loss: 0.7353 (0.7415) time: 0.1735 data: 0.0810 max mem: 9377 +Train: [17] [4800/6250] eta: 0:03:53 lr: 0.000120 grad: 0.1383 (0.1560) loss: 0.7360 (0.7415) time: 0.1750 data: 0.0862 max mem: 9377 +Train: [17] [4900/6250] eta: 0:03:37 lr: 0.000119 grad: 0.1433 (0.1558) loss: 0.7387 (0.7414) time: 0.1642 data: 0.0740 max mem: 9377 +Train: [17] [5000/6250] eta: 0:03:21 lr: 0.000119 grad: 0.1611 (0.1558) loss: 0.7315 (0.7413) time: 0.1474 data: 0.0534 max mem: 9377 +Train: [17] [5100/6250] eta: 0:03:05 lr: 0.000119 grad: 0.1459 (0.1556) loss: 0.7192 (0.7412) time: 0.1552 data: 0.0626 max mem: 9377 +Train: [17] [5200/6250] eta: 0:02:49 lr: 0.000119 grad: 0.1427 (0.1554) loss: 0.7396 (0.7411) time: 0.1515 data: 0.0566 max mem: 9377 +Train: [17] [5300/6250] eta: 0:02:33 lr: 0.000119 grad: 0.1442 (0.1553) loss: 0.7466 (0.7410) time: 0.1727 data: 0.0873 max mem: 9377 +Train: [17] [5400/6250] eta: 0:02:17 lr: 0.000119 grad: 0.1508 (0.1552) loss: 0.7289 (0.7408) time: 0.1761 data: 0.0915 max mem: 9377 +Train: [17] [5500/6250] eta: 0:02:00 lr: 0.000119 grad: 0.1527 (0.1552) loss: 0.7359 (0.7407) time: 0.1787 data: 0.0896 max mem: 9377 +Train: [17] [5600/6250] eta: 0:01:44 lr: 0.000119 grad: 0.1436 (0.1552) loss: 0.7374 (0.7406) time: 0.1537 data: 0.0664 max mem: 9377 +Train: [17] [5700/6250] eta: 0:01:28 lr: 0.000119 grad: 0.1489 (0.1552) loss: 0.7259 (0.7404) time: 0.1600 data: 0.0698 max mem: 9377 +Train: [17] [5800/6250] eta: 0:01:12 lr: 0.000119 grad: 0.1471 (0.1552) loss: 0.7371 (0.7404) time: 0.1561 data: 0.0698 max mem: 9377 +Train: [17] [5900/6250] eta: 0:00:56 lr: 0.000119 grad: 0.1514 (0.1551) loss: 0.7404 (0.7403) time: 0.1628 data: 0.0757 max mem: 9377 +Train: [17] [6000/6250] eta: 0:00:40 lr: 0.000119 grad: 0.1402 (0.1549) loss: 0.7324 (0.7403) time: 0.1470 data: 0.0501 max mem: 9377 +Train: [17] [6100/6250] eta: 0:00:24 lr: 0.000119 grad: 0.1514 (0.1549) loss: 0.7356 (0.7402) time: 0.1407 data: 0.0596 max mem: 9377 +Train: [17] [6200/6250] eta: 0:00:08 lr: 0.000119 grad: 0.1400 (0.1549) loss: 0.7294 (0.7402) time: 0.1645 data: 0.0811 max mem: 9377 +Train: [17] [6249/6250] eta: 0:00:00 lr: 0.000119 grad: 0.1490 (0.1549) loss: 0.7396 (0.7402) time: 0.1704 data: 0.0862 max mem: 9377 +Train: [17] Total time: 0:16:51 (0.1618 s / it) +Averaged stats: lr: 0.000119 grad: 0.1490 (0.1549) loss: 0.7396 (0.7402) +Eval (hcp-train-subset): [17] [ 0/62] eta: 0:03:59 loss: 0.8661 (0.8661) time: 3.8623 data: 3.7909 max mem: 9377 +Eval (hcp-train-subset): [17] [61/62] eta: 0:00:00 loss: 0.8715 (0.8717) time: 0.1477 data: 0.1226 max mem: 9377 +Eval (hcp-train-subset): [17] Total time: 0:00:15 (0.2492 s / it) +Averaged stats (hcp-train-subset): loss: 0.8715 (0.8717) +Eval (hcp-val): [17] [ 0/62] eta: 0:04:31 loss: 0.8627 (0.8627) time: 4.3833 data: 4.3011 max mem: 9377 +Eval (hcp-val): [17] [61/62] eta: 0:00:00 loss: 0.8706 (0.8705) time: 0.1787 data: 0.1540 max mem: 9377 +Eval (hcp-val): [17] Total time: 0:00:16 (0.2600 s / it) +Averaged stats (hcp-val): loss: 0.8706 (0.8705) +Eval (nsd-val): [17] [ 0/62] eta: 0:05:20 loss: 0.8408 (0.8408) time: 5.1615 data: 5.1297 max mem: 9377 +Eval (nsd-val): [17] [61/62] eta: 0:00:00 loss: 0.8480 (0.8517) time: 0.1205 data: 0.0957 max mem: 9377 +Eval (nsd-val): [17] Total time: 0:00:14 (0.2317 s / it) +Averaged stats (nsd-val): loss: 0.8480 (0.8517) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [18] [ 0/6250] eta: 6:58:03 lr: 0.000119 grad: 0.1066 (0.1066) loss: 0.8409 (0.8409) time: 4.0133 data: 3.6910 max mem: 9377 +Train: [18] [ 100/6250] eta: 0:21:29 lr: 0.000119 grad: 0.2960 (0.3026) loss: 0.7429 (0.7700) time: 0.1446 data: 0.0549 max mem: 9377 +Train: [18] [ 200/6250] eta: 0:18:39 lr: 0.000119 grad: 0.2465 (0.2862) loss: 0.7642 (0.7622) time: 0.2040 data: 0.1138 max mem: 9377 +Train: [18] [ 300/6250] eta: 0:17:57 lr: 0.000119 grad: 0.2044 (0.2690) loss: 0.7660 (0.7606) time: 0.1773 data: 0.0871 max mem: 9377 +Train: [18] [ 400/6250] eta: 0:17:04 lr: 0.000119 grad: 0.2095 (0.2498) loss: 0.7234 (0.7555) time: 0.1382 data: 0.0335 max mem: 9377 +Train: [18] [ 500/6250] eta: 0:16:39 lr: 0.000119 grad: 0.1984 (0.2439) loss: 0.7458 (0.7523) time: 0.1629 data: 0.0757 max mem: 9377 +Train: [18] [ 600/6250] eta: 0:16:01 lr: 0.000119 grad: 0.1757 (0.2369) loss: 0.7492 (0.7508) time: 0.1409 data: 0.0289 max mem: 9377 +Train: [18] [ 700/6250] eta: 0:15:40 lr: 0.000119 grad: 0.1622 (0.2288) loss: 0.7395 (0.7485) time: 0.1888 data: 0.0913 max mem: 9377 +Train: [18] [ 800/6250] eta: 0:15:47 lr: 0.000119 grad: 0.1494 (0.2194) loss: 0.7317 (0.7472) time: 0.2174 data: 0.1266 max mem: 9377 +Train: [18] [ 900/6250] eta: 0:15:37 lr: 0.000119 grad: 0.1437 (0.2114) loss: 0.7363 (0.7465) time: 0.1669 data: 0.0688 max mem: 9377 +Train: [18] [1000/6250] eta: 0:15:17 lr: 0.000119 grad: 0.1489 (0.2053) loss: 0.7291 (0.7457) time: 0.1461 data: 0.0601 max mem: 9377 +Train: [18] [1100/6250] eta: 0:14:48 lr: 0.000119 grad: 0.1412 (0.1997) loss: 0.7263 (0.7451) time: 0.1528 data: 0.0574 max mem: 9377 +Train: [18] [1200/6250] eta: 0:14:30 lr: 0.000119 grad: 0.1530 (0.1955) loss: 0.7386 (0.7446) time: 0.1933 data: 0.0997 max mem: 9377 +Train: [18] [1300/6250] eta: 0:14:18 lr: 0.000119 grad: 0.1462 (0.1919) loss: 0.7412 (0.7445) time: 0.1727 data: 0.0882 max mem: 9377 +Train: [18] [1400/6250] eta: 0:13:59 lr: 0.000119 grad: 0.1609 (0.1893) loss: 0.7330 (0.7441) time: 0.1612 data: 0.0766 max mem: 9377 +Train: [18] [1500/6250] eta: 0:13:42 lr: 0.000119 grad: 0.1601 (0.1870) loss: 0.7351 (0.7436) time: 0.1615 data: 0.0759 max mem: 9377 +Train: [18] [1600/6250] eta: 0:13:22 lr: 0.000119 grad: 0.1487 (0.1846) loss: 0.7211 (0.7431) time: 0.1556 data: 0.0485 max mem: 9377 +Train: [18] [1700/6250] eta: 0:13:03 lr: 0.000119 grad: 0.1466 (0.1824) loss: 0.7372 (0.7427) time: 0.1781 data: 0.0744 max mem: 9377 +Train: [18] [1800/6250] eta: 0:12:43 lr: 0.000119 grad: 0.1424 (0.1804) loss: 0.7312 (0.7423) time: 0.1578 data: 0.0697 max mem: 9377 +Train: [18] [1900/6250] eta: 0:12:23 lr: 0.000119 grad: 0.1462 (0.1787) loss: 0.7320 (0.7420) time: 0.1681 data: 0.0791 max mem: 9377 +Train: [18] [2000/6250] eta: 0:12:04 lr: 0.000119 grad: 0.1445 (0.1770) loss: 0.7356 (0.7419) time: 0.1118 data: 0.0177 max mem: 9377 +Train: [18] [2100/6250] eta: 0:11:46 lr: 0.000119 grad: 0.1407 (0.1756) loss: 0.7367 (0.7417) time: 0.1926 data: 0.1064 max mem: 9377 +Train: [18] [2200/6250] eta: 0:11:25 lr: 0.000119 grad: 0.1380 (0.1743) loss: 0.7438 (0.7417) time: 0.1659 data: 0.0859 max mem: 9377 +Train: [18] [2300/6250] eta: 0:11:07 lr: 0.000119 grad: 0.1414 (0.1732) loss: 0.7420 (0.7416) time: 0.1359 data: 0.0475 max mem: 9377 +Train: [18] [2400/6250] eta: 0:10:49 lr: 0.000119 grad: 0.1338 (0.1720) loss: 0.7475 (0.7416) time: 0.1731 data: 0.0783 max mem: 9377 +Train: [18] [2500/6250] eta: 0:10:31 lr: 0.000119 grad: 0.1405 (0.1709) loss: 0.7347 (0.7415) time: 0.1536 data: 0.0714 max mem: 9377 +Train: [18] [2600/6250] eta: 0:10:14 lr: 0.000119 grad: 0.1348 (0.1700) loss: 0.7502 (0.7415) time: 0.1618 data: 0.0825 max mem: 9377 +Train: [18] [2700/6250] eta: 0:09:57 lr: 0.000119 grad: 0.1438 (0.1691) loss: 0.7437 (0.7416) time: 0.1911 data: 0.1143 max mem: 9377 +Train: [18] [2800/6250] eta: 0:09:40 lr: 0.000119 grad: 0.1393 (0.1682) loss: 0.7444 (0.7417) time: 0.2122 data: 0.1231 max mem: 9377 +Train: [18] [2900/6250] eta: 0:09:20 lr: 0.000119 grad: 0.1376 (0.1677) loss: 0.7387 (0.7416) time: 0.1475 data: 0.0671 max mem: 9377 +Train: [18] [3000/6250] eta: 0:09:01 lr: 0.000119 grad: 0.1420 (0.1672) loss: 0.7445 (0.7415) time: 0.1496 data: 0.0584 max mem: 9377 +Train: [18] [3100/6250] eta: 0:08:43 lr: 0.000119 grad: 0.1448 (0.1665) loss: 0.7298 (0.7414) time: 0.1133 data: 0.0253 max mem: 9377 +Train: [18] [3200/6250] eta: 0:08:25 lr: 0.000119 grad: 0.1538 (0.1660) loss: 0.7066 (0.7411) time: 0.1721 data: 0.0808 max mem: 9377 +Train: [18] [3300/6250] eta: 0:08:07 lr: 0.000119 grad: 0.1461 (0.1655) loss: 0.7435 (0.7410) time: 0.1547 data: 0.0706 max mem: 9377 +Train: [18] [3400/6250] eta: 0:07:49 lr: 0.000119 grad: 0.1353 (0.1650) loss: 0.7376 (0.7408) time: 0.1834 data: 0.0951 max mem: 9377 +Train: [18] [3500/6250] eta: 0:07:32 lr: 0.000119 grad: 0.1498 (0.1645) loss: 0.7378 (0.7407) time: 0.1551 data: 0.0651 max mem: 9377 +Train: [18] [3600/6250] eta: 0:07:15 lr: 0.000119 grad: 0.1508 (0.1640) loss: 0.7480 (0.7406) time: 0.1133 data: 0.0150 max mem: 9377 +Train: [18] [3700/6250] eta: 0:06:58 lr: 0.000119 grad: 0.1321 (0.1636) loss: 0.7350 (0.7405) time: 0.1670 data: 0.0754 max mem: 9377 +Train: [18] [3800/6250] eta: 0:06:40 lr: 0.000119 grad: 0.1468 (0.1631) loss: 0.7351 (0.7406) time: 0.1477 data: 0.0526 max mem: 9377 +Train: [18] [3900/6250] eta: 0:06:23 lr: 0.000119 grad: 0.1518 (0.1627) loss: 0.7398 (0.7405) time: 0.1495 data: 0.0606 max mem: 9377 +Train: [18] [4000/6250] eta: 0:06:06 lr: 0.000119 grad: 0.1423 (0.1623) loss: 0.7315 (0.7405) time: 0.1468 data: 0.0629 max mem: 9377 +Train: [18] [4100/6250] eta: 0:05:49 lr: 0.000119 grad: 0.1349 (0.1618) loss: 0.7404 (0.7405) time: 0.1559 data: 0.0668 max mem: 9377 +Train: [18] [4200/6250] eta: 0:05:32 lr: 0.000119 grad: 0.1502 (0.1615) loss: 0.7211 (0.7403) time: 0.1526 data: 0.0680 max mem: 9377 +Train: [18] [4300/6250] eta: 0:05:16 lr: 0.000119 grad: 0.1436 (0.1611) loss: 0.7348 (0.7402) time: 0.1998 data: 0.1072 max mem: 9377 +Train: [18] [4400/6250] eta: 0:05:00 lr: 0.000119 grad: 0.1417 (0.1608) loss: 0.7317 (0.7401) time: 0.1394 data: 0.0528 max mem: 9377 +Train: [18] [4500/6250] eta: 0:04:44 lr: 0.000119 grad: 0.1505 (0.1605) loss: 0.7346 (0.7401) time: 0.1581 data: 0.0656 max mem: 9377 +Train: [18] [4600/6250] eta: 0:04:27 lr: 0.000119 grad: 0.1379 (0.1602) loss: 0.7452 (0.7401) time: 0.1555 data: 0.0701 max mem: 9377 +Train: [18] [4700/6250] eta: 0:04:11 lr: 0.000119 grad: 0.1499 (0.1600) loss: 0.7228 (0.7401) time: 0.1497 data: 0.0567 max mem: 9377 +Train: [18] [4800/6250] eta: 0:03:55 lr: 0.000119 grad: 0.1444 (0.1598) loss: 0.7417 (0.7401) time: 0.1646 data: 0.0798 max mem: 9377 +Train: [18] [4900/6250] eta: 0:03:38 lr: 0.000119 grad: 0.1501 (0.1595) loss: 0.7429 (0.7401) time: 0.1535 data: 0.0678 max mem: 9377 +Train: [18] [5000/6250] eta: 0:03:22 lr: 0.000119 grad: 0.1466 (0.1592) loss: 0.7503 (0.7401) time: 0.1413 data: 0.0527 max mem: 9377 +Train: [18] [5100/6250] eta: 0:03:06 lr: 0.000119 grad: 0.1416 (0.1589) loss: 0.7304 (0.7401) time: 0.1565 data: 0.0638 max mem: 9377 +Train: [18] [5200/6250] eta: 0:02:49 lr: 0.000119 grad: 0.1456 (0.1586) loss: 0.7369 (0.7401) time: 0.1456 data: 0.0602 max mem: 9377 +Train: [18] [5300/6250] eta: 0:02:33 lr: 0.000119 grad: 0.1400 (0.1584) loss: 0.7299 (0.7401) time: 0.1724 data: 0.0871 max mem: 9377 +Train: [18] [5400/6250] eta: 0:02:17 lr: 0.000119 grad: 0.1376 (0.1582) loss: 0.7356 (0.7400) time: 0.1552 data: 0.0533 max mem: 9377 +Train: [18] [5500/6250] eta: 0:02:00 lr: 0.000119 grad: 0.1513 (0.1580) loss: 0.7319 (0.7400) time: 0.1571 data: 0.0705 max mem: 9377 +Train: [18] [5600/6250] eta: 0:01:44 lr: 0.000119 grad: 0.1400 (0.1579) loss: 0.7397 (0.7400) time: 0.1515 data: 0.0643 max mem: 9377 +Train: [18] [5700/6250] eta: 0:01:28 lr: 0.000119 grad: 0.1479 (0.1577) loss: 0.7335 (0.7400) time: 0.1146 data: 0.0228 max mem: 9377 +Train: [18] [5800/6250] eta: 0:01:12 lr: 0.000118 grad: 0.1501 (0.1576) loss: 0.7384 (0.7401) time: 0.1665 data: 0.0738 max mem: 9377 +Train: [18] [5900/6250] eta: 0:00:56 lr: 0.000118 grad: 0.1566 (0.1575) loss: 0.7341 (0.7399) time: 0.1603 data: 0.0812 max mem: 9377 +Train: [18] [6000/6250] eta: 0:00:40 lr: 0.000118 grad: 0.1395 (0.1573) loss: 0.7400 (0.7400) time: 0.1553 data: 0.0659 max mem: 9377 +Train: [18] [6100/6250] eta: 0:00:24 lr: 0.000118 grad: 0.1492 (0.1572) loss: 0.7261 (0.7399) time: 0.1615 data: 0.0762 max mem: 9377 +Train: [18] [6200/6250] eta: 0:00:08 lr: 0.000118 grad: 0.1404 (0.1571) loss: 0.7499 (0.7399) time: 0.1564 data: 0.0729 max mem: 9377 +Train: [18] [6249/6250] eta: 0:00:00 lr: 0.000118 grad: 0.1486 (0.1571) loss: 0.7379 (0.7399) time: 0.1513 data: 0.0589 max mem: 9377 +Train: [18] Total time: 0:16:50 (0.1616 s / it) +Averaged stats: lr: 0.000118 grad: 0.1486 (0.1571) loss: 0.7379 (0.7399) +Eval (hcp-train-subset): [18] [ 0/62] eta: 0:04:39 loss: 0.8614 (0.8614) time: 4.5098 data: 4.4671 max mem: 9377 +Eval (hcp-train-subset): [18] [61/62] eta: 0:00:00 loss: 0.8730 (0.8724) time: 0.1176 data: 0.0922 max mem: 9377 +Eval (hcp-train-subset): [18] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (hcp-train-subset): loss: 0.8730 (0.8724) +Eval (hcp-val): [18] [ 0/62] eta: 0:04:46 loss: 0.8735 (0.8735) time: 4.6231 data: 4.5543 max mem: 9377 +Eval (hcp-val): [18] [61/62] eta: 0:00:00 loss: 0.8697 (0.8723) time: 0.1332 data: 0.1063 max mem: 9377 +Eval (hcp-val): [18] Total time: 0:00:14 (0.2316 s / it) +Averaged stats (hcp-val): loss: 0.8697 (0.8723) +Eval (nsd-val): [18] [ 0/62] eta: 0:04:10 loss: 0.8456 (0.8456) time: 4.0377 data: 3.9663 max mem: 9377 +Eval (nsd-val): [18] [61/62] eta: 0:00:00 loss: 0.8521 (0.8548) time: 0.1909 data: 0.1660 max mem: 9377 +Eval (nsd-val): [18] Total time: 0:00:15 (0.2503 s / it) +Averaged stats (nsd-val): loss: 0.8521 (0.8548) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [19] [ 0/6250] eta: 10:13:30 lr: 0.000118 grad: 0.1699 (0.1699) loss: 0.7743 (0.7743) time: 5.8896 data: 5.7940 max mem: 9377 +Train: [19] [ 100/6250] eta: 0:21:56 lr: 0.000118 grad: 0.2842 (0.2687) loss: 0.7482 (0.7749) time: 0.1807 data: 0.1022 max mem: 9377 +Train: [19] [ 200/6250] eta: 0:19:04 lr: 0.000118 grad: 0.2521 (0.2610) loss: 0.7256 (0.7618) time: 0.1431 data: 0.0592 max mem: 9377 +Train: [19] [ 300/6250] eta: 0:17:48 lr: 0.000118 grad: 0.1924 (0.2659) loss: 0.7644 (0.7573) time: 0.1630 data: 0.0644 max mem: 9377 +Train: [19] [ 400/6250] eta: 0:17:12 lr: 0.000118 grad: 0.1681 (0.2447) loss: 0.7404 (0.7528) time: 0.1495 data: 0.0506 max mem: 9377 +Train: [19] [ 500/6250] eta: 0:16:48 lr: 0.000118 grad: 0.1799 (0.2307) loss: 0.7470 (0.7518) time: 0.1799 data: 0.0888 max mem: 9377 +Train: [19] [ 600/6250] eta: 0:16:33 lr: 0.000118 grad: 0.1867 (0.2273) loss: 0.7365 (0.7496) time: 0.1896 data: 0.0860 max mem: 9377 +Train: [19] [ 700/6250] eta: 0:16:20 lr: 0.000118 grad: 0.1640 (0.2213) loss: 0.7214 (0.7473) time: 0.1862 data: 0.0939 max mem: 9377 +Train: [19] [ 800/6250] eta: 0:16:02 lr: 0.000118 grad: 0.1520 (0.2132) loss: 0.7433 (0.7454) time: 0.1481 data: 0.0698 max mem: 9377 +Train: [19] [ 900/6250] eta: 0:15:46 lr: 0.000118 grad: 0.1473 (0.2064) loss: 0.7199 (0.7443) time: 0.1837 data: 0.0925 max mem: 9377 +Train: [19] [1000/6250] eta: 0:15:28 lr: 0.000118 grad: 0.1414 (0.2003) loss: 0.7545 (0.7436) time: 0.1942 data: 0.0981 max mem: 9377 +Train: [19] [1100/6250] eta: 0:15:06 lr: 0.000118 grad: 0.1484 (0.1957) loss: 0.7264 (0.7431) time: 0.1592 data: 0.0753 max mem: 9377 +Train: [19] [1200/6250] eta: 0:14:46 lr: 0.000118 grad: 0.1415 (0.1920) loss: 0.7161 (0.7420) time: 0.1671 data: 0.0666 max mem: 9377 +Train: [19] [1300/6250] eta: 0:14:27 lr: 0.000118 grad: 0.1482 (0.1885) loss: 0.7222 (0.7414) time: 0.1498 data: 0.0462 max mem: 9377 +Train: [19] [1400/6250] eta: 0:14:13 lr: 0.000118 grad: 0.1369 (0.1856) loss: 0.7333 (0.7409) time: 0.1761 data: 0.0933 max mem: 9377 +Train: [19] [1500/6250] eta: 0:13:54 lr: 0.000118 grad: 0.1431 (0.1829) loss: 0.7201 (0.7402) time: 0.1735 data: 0.0770 max mem: 9377 +Train: [19] [1600/6250] eta: 0:13:33 lr: 0.000118 grad: 0.1412 (0.1809) loss: 0.7105 (0.7395) time: 0.1529 data: 0.0674 max mem: 9377 +Train: [19] [1700/6250] eta: 0:13:11 lr: 0.000118 grad: 0.1397 (0.1790) loss: 0.7315 (0.7394) time: 0.1786 data: 0.0950 max mem: 9377 +Train: [19] [1800/6250] eta: 0:12:51 lr: 0.000118 grad: 0.1394 (0.1771) loss: 0.7446 (0.7394) time: 0.1618 data: 0.0722 max mem: 9377 +Train: [19] [1900/6250] eta: 0:12:30 lr: 0.000118 grad: 0.1361 (0.1755) loss: 0.7367 (0.7392) time: 0.1458 data: 0.0565 max mem: 9377 +Train: [19] [2000/6250] eta: 0:12:09 lr: 0.000118 grad: 0.1544 (0.1743) loss: 0.7309 (0.7390) time: 0.1612 data: 0.0651 max mem: 9377 +Train: [19] [2100/6250] eta: 0:11:47 lr: 0.000118 grad: 0.1460 (0.1728) loss: 0.7359 (0.7390) time: 0.1330 data: 0.0448 max mem: 9377 +Train: [19] [2200/6250] eta: 0:11:27 lr: 0.000118 grad: 0.1472 (0.1718) loss: 0.7483 (0.7390) time: 0.1574 data: 0.0625 max mem: 9377 +Train: [19] [2300/6250] eta: 0:11:07 lr: 0.000118 grad: 0.1609 (0.1708) loss: 0.7289 (0.7390) time: 0.1760 data: 0.0884 max mem: 9377 +Train: [19] [2400/6250] eta: 0:10:48 lr: 0.000118 grad: 0.1444 (0.1697) loss: 0.7477 (0.7391) time: 0.1191 data: 0.0230 max mem: 9377 +Train: [19] [2500/6250] eta: 0:10:30 lr: 0.000118 grad: 0.1455 (0.1688) loss: 0.7408 (0.7393) time: 0.1652 data: 0.0712 max mem: 9377 +Train: [19] [2600/6250] eta: 0:10:11 lr: 0.000118 grad: 0.1455 (0.1680) loss: 0.7356 (0.7392) time: 0.1537 data: 0.0638 max mem: 9377 +Train: [19] [2700/6250] eta: 0:09:52 lr: 0.000118 grad: 0.1563 (0.1674) loss: 0.7328 (0.7392) time: 0.1483 data: 0.0627 max mem: 9377 +Train: [19] [2800/6250] eta: 0:09:35 lr: 0.000118 grad: 0.1402 (0.1668) loss: 0.7412 (0.7391) time: 0.1522 data: 0.0657 max mem: 9377 +Train: [19] [2900/6250] eta: 0:09:16 lr: 0.000118 grad: 0.1410 (0.1664) loss: 0.7415 (0.7390) time: 0.1462 data: 0.0570 max mem: 9377 +Train: [19] [3000/6250] eta: 0:08:58 lr: 0.000118 grad: 0.1496 (0.1659) loss: 0.7343 (0.7389) time: 0.1268 data: 0.0393 max mem: 9377 +Train: [19] [3100/6250] eta: 0:08:41 lr: 0.000118 grad: 0.1455 (0.1654) loss: 0.7337 (0.7388) time: 0.1588 data: 0.0707 max mem: 9377 +Train: [19] [3200/6250] eta: 0:08:23 lr: 0.000118 grad: 0.1452 (0.1649) loss: 0.7444 (0.7388) time: 0.1487 data: 0.0607 max mem: 9377 +Train: [19] [3300/6250] eta: 0:08:05 lr: 0.000118 grad: 0.1477 (0.1644) loss: 0.7401 (0.7387) time: 0.1368 data: 0.0425 max mem: 9377 +Train: [19] [3400/6250] eta: 0:07:49 lr: 0.000118 grad: 0.1398 (0.1640) loss: 0.7487 (0.7387) time: 0.1637 data: 0.0731 max mem: 9377 +Train: [19] [3500/6250] eta: 0:07:33 lr: 0.000118 grad: 0.1489 (0.1634) loss: 0.7302 (0.7386) time: 0.1816 data: 0.0857 max mem: 9377 +Train: [19] [3600/6250] eta: 0:07:16 lr: 0.000118 grad: 0.1488 (0.1630) loss: 0.7325 (0.7386) time: 0.1671 data: 0.0821 max mem: 9377 +Train: [19] [3700/6250] eta: 0:06:59 lr: 0.000118 grad: 0.1463 (0.1626) loss: 0.7343 (0.7386) time: 0.1518 data: 0.0628 max mem: 9377 +Train: [19] [3800/6250] eta: 0:06:42 lr: 0.000118 grad: 0.1416 (0.1622) loss: 0.7437 (0.7384) time: 0.1530 data: 0.0565 max mem: 9377 +Train: [19] [3900/6250] eta: 0:06:26 lr: 0.000118 grad: 0.1400 (0.1619) loss: 0.7339 (0.7383) time: 0.1602 data: 0.0668 max mem: 9377 +Train: [19] [4000/6250] eta: 0:06:10 lr: 0.000118 grad: 0.1481 (0.1615) loss: 0.7403 (0.7383) time: 0.1556 data: 0.0701 max mem: 9377 +Train: [19] [4100/6250] eta: 0:05:53 lr: 0.000118 grad: 0.1518 (0.1612) loss: 0.7310 (0.7381) time: 0.1681 data: 0.0763 max mem: 9377 +Train: [19] [4200/6250] eta: 0:05:36 lr: 0.000118 grad: 0.1481 (0.1609) loss: 0.7342 (0.7381) time: 0.1578 data: 0.0652 max mem: 9377 +Train: [19] [4300/6250] eta: 0:05:20 lr: 0.000118 grad: 0.1486 (0.1606) loss: 0.7307 (0.7380) time: 0.1619 data: 0.0733 max mem: 9377 +Train: [19] [4400/6250] eta: 0:05:04 lr: 0.000118 grad: 0.1425 (0.1603) loss: 0.7409 (0.7379) time: 0.1564 data: 0.0590 max mem: 9377 +Train: [19] [4500/6250] eta: 0:04:47 lr: 0.000118 grad: 0.1462 (0.1602) loss: 0.7383 (0.7380) time: 0.1424 data: 0.0524 max mem: 9377 +Train: [19] [4600/6250] eta: 0:04:30 lr: 0.000118 grad: 0.1422 (0.1598) loss: 0.7379 (0.7380) time: 0.1527 data: 0.0573 max mem: 9377 +Train: [19] [4700/6250] eta: 0:04:14 lr: 0.000118 grad: 0.1382 (0.1596) loss: 0.7302 (0.7381) time: 0.1321 data: 0.0434 max mem: 9377 +Train: [19] [4800/6250] eta: 0:03:57 lr: 0.000118 grad: 0.1435 (0.1593) loss: 0.7327 (0.7381) time: 0.1535 data: 0.0625 max mem: 9377 +Train: [19] [4900/6250] eta: 0:03:41 lr: 0.000118 grad: 0.1444 (0.1590) loss: 0.7231 (0.7381) time: 0.1291 data: 0.0442 max mem: 9377 +Train: [19] [5000/6250] eta: 0:03:24 lr: 0.000118 grad: 0.1399 (0.1588) loss: 0.7236 (0.7381) time: 0.1662 data: 0.0835 max mem: 9377 +Train: [19] [5100/6250] eta: 0:03:08 lr: 0.000118 grad: 0.1496 (0.1586) loss: 0.7285 (0.7381) time: 0.1368 data: 0.0445 max mem: 9377 +Train: [19] [5200/6250] eta: 0:02:51 lr: 0.000118 grad: 0.1477 (0.1584) loss: 0.7307 (0.7380) time: 0.1736 data: 0.0844 max mem: 9377 +Train: [19] [5300/6250] eta: 0:02:35 lr: 0.000118 grad: 0.1462 (0.1582) loss: 0.7311 (0.7379) time: 0.1671 data: 0.0759 max mem: 9377 +Train: [19] [5400/6250] eta: 0:02:18 lr: 0.000118 grad: 0.1405 (0.1580) loss: 0.7406 (0.7378) time: 0.1731 data: 0.0858 max mem: 9377 +Train: [19] [5500/6250] eta: 0:02:02 lr: 0.000118 grad: 0.1391 (0.1579) loss: 0.7265 (0.7376) time: 0.1614 data: 0.0740 max mem: 9377 +Train: [19] [5600/6250] eta: 0:01:46 lr: 0.000118 grad: 0.1421 (0.1577) loss: 0.7393 (0.7376) time: 0.1566 data: 0.0751 max mem: 9377 +Train: [19] [5700/6250] eta: 0:01:29 lr: 0.000118 grad: 0.1413 (0.1575) loss: 0.7225 (0.7375) time: 0.1481 data: 0.0503 max mem: 9377 +Train: [19] [5800/6250] eta: 0:01:13 lr: 0.000118 grad: 0.1411 (0.1573) loss: 0.7295 (0.7374) time: 0.1525 data: 0.0576 max mem: 9377 +Train: [19] [5900/6250] eta: 0:00:56 lr: 0.000118 grad: 0.1502 (0.1572) loss: 0.7328 (0.7373) time: 0.1597 data: 0.0704 max mem: 9377 +Train: [19] [6000/6250] eta: 0:00:40 lr: 0.000118 grad: 0.1361 (0.1571) loss: 0.7344 (0.7372) time: 0.1371 data: 0.0493 max mem: 9377 +Train: [19] [6100/6250] eta: 0:00:24 lr: 0.000117 grad: 0.1415 (0.1570) loss: 0.7328 (0.7370) time: 0.1532 data: 0.0572 max mem: 9377 +Train: [19] [6200/6250] eta: 0:00:08 lr: 0.000117 grad: 0.1426 (0.1568) loss: 0.7300 (0.7370) time: 0.1661 data: 0.0819 max mem: 9377 +Train: [19] [6249/6250] eta: 0:00:00 lr: 0.000117 grad: 0.1415 (0.1567) loss: 0.7394 (0.7369) time: 0.1361 data: 0.0429 max mem: 9377 +Train: [19] Total time: 0:16:59 (0.1632 s / it) +Averaged stats: lr: 0.000117 grad: 0.1415 (0.1567) loss: 0.7394 (0.7369) +Eval (hcp-train-subset): [19] [ 0/62] eta: 0:03:20 loss: 0.8673 (0.8673) time: 3.2410 data: 3.1549 max mem: 9377 +Eval (hcp-train-subset): [19] [61/62] eta: 0:00:00 loss: 0.8739 (0.8715) time: 0.1338 data: 0.1068 max mem: 9377 +Eval (hcp-train-subset): [19] Total time: 0:00:14 (0.2357 s / it) +Averaged stats (hcp-train-subset): loss: 0.8739 (0.8715) +Making plots (hcp-train-subset): example=57 +Eval (hcp-val): [19] [ 0/62] eta: 0:04:20 loss: 0.8733 (0.8733) time: 4.1936 data: 4.0944 max mem: 9377 +Eval (hcp-val): [19] [61/62] eta: 0:00:00 loss: 0.8704 (0.8713) time: 0.1429 data: 0.1178 max mem: 9377 +Eval (hcp-val): [19] Total time: 0:00:14 (0.2347 s / it) +Averaged stats (hcp-val): loss: 0.8704 (0.8713) +Making plots (hcp-val): example=6 +Eval (nsd-val): [19] [ 0/62] eta: 0:07:40 loss: 0.8381 (0.8381) time: 7.4238 data: 7.3927 max mem: 9377 +Eval (nsd-val): [19] [61/62] eta: 0:00:00 loss: 0.8520 (0.8531) time: 0.1586 data: 0.1329 max mem: 9377 +Eval (nsd-val): [19] Total time: 0:00:17 (0.2845 s / it) +Averaged stats (nsd-val): loss: 0.8520 (0.8531) +Making plots (nsd-val): example=22 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00019.pth +Train: [20] [ 0/6250] eta: 10:56:43 lr: 0.000117 grad: 0.1033 (0.1033) loss: 0.8340 (0.8340) time: 6.3046 data: 6.0997 max mem: 9377 +Train: [20] [ 100/6250] eta: 0:24:51 lr: 0.000117 grad: 0.2311 (0.2900) loss: 0.7332 (0.7393) time: 0.2106 data: 0.1129 max mem: 9377 +Train: [20] [ 200/6250] eta: 0:20:54 lr: 0.000117 grad: 0.2136 (0.2774) loss: 0.7049 (0.7305) time: 0.1682 data: 0.0733 max mem: 9377 +Train: [20] [ 300/6250] eta: 0:19:53 lr: 0.000117 grad: 0.2041 (0.2580) loss: 0.7236 (0.7340) time: 0.2120 data: 0.1139 max mem: 9377 +Train: [20] [ 400/6250] eta: 0:19:10 lr: 0.000117 grad: 0.2272 (0.2540) loss: 0.7382 (0.7354) time: 0.1956 data: 0.1044 max mem: 9377 +Train: [20] [ 500/6250] eta: 0:18:30 lr: 0.000117 grad: 0.1842 (0.2421) loss: 0.7548 (0.7375) time: 0.2077 data: 0.1024 max mem: 9377 +Train: [20] [ 600/6250] eta: 0:17:57 lr: 0.000117 grad: 0.1590 (0.2307) loss: 0.7612 (0.7399) time: 0.1748 data: 0.0721 max mem: 9377 +Train: [20] [ 700/6250] eta: 0:17:38 lr: 0.000117 grad: 0.1354 (0.2191) loss: 0.7564 (0.7408) time: 0.1315 data: 0.0238 max mem: 9377 +Train: [20] [ 800/6250] eta: 0:17:03 lr: 0.000117 grad: 0.1407 (0.2099) loss: 0.7358 (0.7410) time: 0.1836 data: 0.0907 max mem: 9377 +Train: [20] [ 900/6250] eta: 0:16:33 lr: 0.000117 grad: 0.1415 (0.2026) loss: 0.7390 (0.7412) time: 0.1985 data: 0.0910 max mem: 9377 +Train: [20] [1000/6250] eta: 0:16:00 lr: 0.000117 grad: 0.1373 (0.1974) loss: 0.7384 (0.7411) time: 0.1675 data: 0.0678 max mem: 9377 +Train: [20] [1100/6250] eta: 0:15:25 lr: 0.000117 grad: 0.1451 (0.1927) loss: 0.7267 (0.7406) time: 0.1354 data: 0.0545 max mem: 9377 +Train: [20] [1200/6250] eta: 0:14:59 lr: 0.000117 grad: 0.1485 (0.1893) loss: 0.7252 (0.7404) time: 0.1802 data: 0.0862 max mem: 9377 +Train: [20] [1300/6250] eta: 0:14:31 lr: 0.000117 grad: 0.1443 (0.1865) loss: 0.7315 (0.7405) time: 0.1450 data: 0.0611 max mem: 9377 +Train: [20] [1400/6250] eta: 0:14:16 lr: 0.000117 grad: 0.1349 (0.1836) loss: 0.7343 (0.7406) time: 0.1826 data: 0.0970 max mem: 9377 +Train: [20] [1500/6250] eta: 0:13:50 lr: 0.000117 grad: 0.1405 (0.1809) loss: 0.7402 (0.7406) time: 0.1569 data: 0.0674 max mem: 9377 +Train: [20] [1600/6250] eta: 0:13:27 lr: 0.000117 grad: 0.1431 (0.1787) loss: 0.7422 (0.7404) time: 0.1656 data: 0.0853 max mem: 9377 +Train: [20] [1700/6250] eta: 0:13:01 lr: 0.000117 grad: 0.1537 (0.1770) loss: 0.7366 (0.7403) time: 0.1377 data: 0.0464 max mem: 9377 +Train: [20] [1800/6250] eta: 0:12:37 lr: 0.000117 grad: 0.1340 (0.1755) loss: 0.7546 (0.7402) time: 0.1638 data: 0.0742 max mem: 9377 +Train: [20] [1900/6250] eta: 0:12:18 lr: 0.000117 grad: 0.1375 (0.1741) loss: 0.7357 (0.7401) time: 0.1528 data: 0.0715 max mem: 9377 +Train: [20] [2000/6250] eta: 0:11:59 lr: 0.000117 grad: 0.1424 (0.1724) loss: 0.7541 (0.7401) time: 0.1649 data: 0.0709 max mem: 9377 +Train: [20] [2100/6250] eta: 0:11:36 lr: 0.000117 grad: 0.1475 (0.1711) loss: 0.7419 (0.7402) time: 0.1420 data: 0.0378 max mem: 9377 +Train: [20] [2200/6250] eta: 0:11:17 lr: 0.000117 grad: 0.1396 (0.1699) loss: 0.7273 (0.7400) time: 0.1722 data: 0.0728 max mem: 9377 +Train: [20] [2300/6250] eta: 0:10:56 lr: 0.000117 grad: 0.1433 (0.1687) loss: 0.7474 (0.7402) time: 0.1501 data: 0.0554 max mem: 9377 +Train: [20] [2400/6250] eta: 0:10:37 lr: 0.000117 grad: 0.1417 (0.1676) loss: 0.7429 (0.7403) time: 0.1662 data: 0.0783 max mem: 9377 +Train: [20] [2500/6250] eta: 0:10:20 lr: 0.000117 grad: 0.1502 (0.1666) loss: 0.7205 (0.7403) time: 0.1662 data: 0.0799 max mem: 9377 +Train: [20] [2600/6250] eta: 0:10:02 lr: 0.000117 grad: 0.1472 (0.1658) loss: 0.7363 (0.7400) time: 0.1414 data: 0.0597 max mem: 9377 +Train: [20] [2700/6250] eta: 0:09:45 lr: 0.000117 grad: 0.1326 (0.1650) loss: 0.7433 (0.7399) time: 0.1771 data: 0.0915 max mem: 9377 +Train: [20] [2800/6250] eta: 0:09:28 lr: 0.000117 grad: 0.1372 (0.1643) loss: 0.7459 (0.7398) time: 0.1973 data: 0.1066 max mem: 9377 +Train: [20] [2900/6250] eta: 0:09:11 lr: 0.000117 grad: 0.1432 (0.1636) loss: 0.7349 (0.7398) time: 0.1643 data: 0.0737 max mem: 9377 +Train: [20] [3000/6250] eta: 0:08:54 lr: 0.000117 grad: 0.1377 (0.1630) loss: 0.7360 (0.7396) time: 0.1676 data: 0.0787 max mem: 9377 +Train: [20] [3100/6250] eta: 0:08:37 lr: 0.000117 grad: 0.1446 (0.1624) loss: 0.7520 (0.7396) time: 0.1726 data: 0.0887 max mem: 9377 +Train: [20] [3200/6250] eta: 0:08:20 lr: 0.000117 grad: 0.1535 (0.1620) loss: 0.7307 (0.7394) time: 0.1448 data: 0.0592 max mem: 9377 +Train: [20] [3300/6250] eta: 0:08:02 lr: 0.000117 grad: 0.1419 (0.1617) loss: 0.7338 (0.7392) time: 0.1391 data: 0.0542 max mem: 9377 +Train: [20] [3400/6250] eta: 0:07:46 lr: 0.000117 grad: 0.1377 (0.1611) loss: 0.7210 (0.7390) time: 0.1768 data: 0.0914 max mem: 9377 +Train: [20] [3500/6250] eta: 0:07:29 lr: 0.000117 grad: 0.1423 (0.1607) loss: 0.7286 (0.7388) time: 0.1473 data: 0.0577 max mem: 9377 +Train: [20] [3600/6250] eta: 0:07:12 lr: 0.000117 grad: 0.1490 (0.1604) loss: 0.7320 (0.7385) time: 0.1523 data: 0.0681 max mem: 9377 +Train: [20] [3700/6250] eta: 0:06:55 lr: 0.000117 grad: 0.1440 (0.1601) loss: 0.7372 (0.7383) time: 0.1782 data: 0.0957 max mem: 9377 +Train: [20] [3800/6250] eta: 0:06:39 lr: 0.000117 grad: 0.1468 (0.1598) loss: 0.7156 (0.7381) time: 0.2143 data: 0.1261 max mem: 9377 +Train: [20] [3900/6250] eta: 0:06:22 lr: 0.000117 grad: 0.1539 (0.1595) loss: 0.7236 (0.7378) time: 0.1561 data: 0.0683 max mem: 9377 +Train: [20] [4000/6250] eta: 0:06:05 lr: 0.000117 grad: 0.1448 (0.1594) loss: 0.7251 (0.7375) time: 0.1582 data: 0.0739 max mem: 9377 +Train: [20] [4100/6250] eta: 0:05:49 lr: 0.000117 grad: 0.1503 (0.1593) loss: 0.7156 (0.7372) time: 0.1513 data: 0.0568 max mem: 9377 +Train: [20] [4200/6250] eta: 0:05:33 lr: 0.000117 grad: 0.1548 (0.1592) loss: 0.7118 (0.7369) time: 0.1694 data: 0.0788 max mem: 9377 +Train: [20] [4300/6250] eta: 0:05:16 lr: 0.000117 grad: 0.1619 (0.1591) loss: 0.7206 (0.7367) time: 0.1517 data: 0.0592 max mem: 9377 +Train: [20] [4400/6250] eta: 0:04:59 lr: 0.000117 grad: 0.1527 (0.1589) loss: 0.7147 (0.7363) time: 0.1441 data: 0.0556 max mem: 9377 +Train: [20] [4500/6250] eta: 0:04:43 lr: 0.000117 grad: 0.1443 (0.1587) loss: 0.7136 (0.7360) time: 0.1555 data: 0.0624 max mem: 9377 +Train: [20] [4600/6250] eta: 0:04:26 lr: 0.000117 grad: 0.1434 (0.1584) loss: 0.7401 (0.7359) time: 0.1666 data: 0.0743 max mem: 9377 +Train: [20] [4700/6250] eta: 0:04:10 lr: 0.000117 grad: 0.1435 (0.1582) loss: 0.7243 (0.7357) time: 0.1681 data: 0.0815 max mem: 9377 +Train: [20] [4800/6250] eta: 0:03:54 lr: 0.000117 grad: 0.1586 (0.1580) loss: 0.7267 (0.7355) time: 0.1634 data: 0.0774 max mem: 9377 +Train: [20] [4900/6250] eta: 0:03:38 lr: 0.000117 grad: 0.1497 (0.1578) loss: 0.7319 (0.7354) time: 0.1473 data: 0.0593 max mem: 9377 +Train: [20] [5000/6250] eta: 0:03:21 lr: 0.000117 grad: 0.1446 (0.1577) loss: 0.7345 (0.7354) time: 0.1428 data: 0.0551 max mem: 9377 +Train: [20] [5100/6250] eta: 0:03:05 lr: 0.000117 grad: 0.1478 (0.1575) loss: 0.7265 (0.7354) time: 0.1190 data: 0.0250 max mem: 9377 +Train: [20] [5200/6250] eta: 0:02:49 lr: 0.000117 grad: 0.1493 (0.1573) loss: 0.7215 (0.7353) time: 0.1692 data: 0.0781 max mem: 9377 +Train: [20] [5300/6250] eta: 0:02:33 lr: 0.000117 grad: 0.1408 (0.1572) loss: 0.7374 (0.7352) time: 0.1481 data: 0.0655 max mem: 9377 +Train: [20] [5400/6250] eta: 0:02:16 lr: 0.000117 grad: 0.1467 (0.1570) loss: 0.7247 (0.7352) time: 0.1470 data: 0.0641 max mem: 9377 +Train: [20] [5500/6250] eta: 0:02:00 lr: 0.000117 grad: 0.1379 (0.1568) loss: 0.7342 (0.7351) time: 0.1742 data: 0.0947 max mem: 9377 +Train: [20] [5600/6250] eta: 0:01:44 lr: 0.000117 grad: 0.1526 (0.1566) loss: 0.7500 (0.7350) time: 0.1563 data: 0.0690 max mem: 9377 +Train: [20] [5700/6250] eta: 0:01:28 lr: 0.000117 grad: 0.1390 (0.1564) loss: 0.7329 (0.7350) time: 0.1536 data: 0.0693 max mem: 9377 +Train: [20] [5800/6250] eta: 0:01:12 lr: 0.000117 grad: 0.1511 (0.1563) loss: 0.7134 (0.7348) time: 0.1531 data: 0.0627 max mem: 9377 +Train: [20] [5900/6250] eta: 0:00:56 lr: 0.000117 grad: 0.1467 (0.1563) loss: 0.7287 (0.7346) time: 0.1594 data: 0.0782 max mem: 9377 +Train: [20] [6000/6250] eta: 0:00:40 lr: 0.000116 grad: 0.1432 (0.1561) loss: 0.7373 (0.7346) time: 0.1643 data: 0.0826 max mem: 9377 +Train: [20] [6100/6250] eta: 0:00:24 lr: 0.000116 grad: 0.1612 (0.1561) loss: 0.7355 (0.7345) time: 0.1496 data: 0.0538 max mem: 9377 +Train: [20] [6200/6250] eta: 0:00:08 lr: 0.000116 grad: 0.1405 (0.1559) loss: 0.7314 (0.7345) time: 0.1534 data: 0.0613 max mem: 9377 +Train: [20] [6249/6250] eta: 0:00:00 lr: 0.000116 grad: 0.1444 (0.1559) loss: 0.7305 (0.7344) time: 0.1519 data: 0.0630 max mem: 9377 +Train: [20] Total time: 0:16:51 (0.1618 s / it) +Averaged stats: lr: 0.000116 grad: 0.1444 (0.1559) loss: 0.7305 (0.7344) +Eval (hcp-train-subset): [20] [ 0/62] eta: 0:06:17 loss: 0.8649 (0.8649) time: 6.0890 data: 6.0571 max mem: 9377 +Eval (hcp-train-subset): [20] [61/62] eta: 0:00:00 loss: 0.8706 (0.8736) time: 0.1459 data: 0.1207 max mem: 9377 +Eval (hcp-train-subset): [20] Total time: 0:00:15 (0.2462 s / it) +Averaged stats (hcp-train-subset): loss: 0.8706 (0.8736) +Eval (hcp-val): [20] [ 0/62] eta: 0:03:55 loss: 0.8663 (0.8663) time: 3.8052 data: 3.7229 max mem: 9377 +Eval (hcp-val): [20] [61/62] eta: 0:00:00 loss: 0.8694 (0.8715) time: 0.1519 data: 0.1247 max mem: 9377 +Eval (hcp-val): [20] Total time: 0:00:15 (0.2455 s / it) +Averaged stats (hcp-val): loss: 0.8694 (0.8715) +Eval (nsd-val): [20] [ 0/62] eta: 0:05:50 loss: 0.8517 (0.8517) time: 5.6591 data: 5.6268 max mem: 9377 +Eval (nsd-val): [20] [61/62] eta: 0:00:00 loss: 0.8552 (0.8572) time: 0.1928 data: 0.1623 max mem: 9377 +Eval (nsd-val): [20] Total time: 0:00:16 (0.2626 s / it) +Averaged stats (nsd-val): loss: 0.8552 (0.8572) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [21] [ 0/6250] eta: 12:32:17 lr: 0.000116 grad: 0.3320 (0.3320) loss: 0.6610 (0.6610) time: 7.2220 data: 7.1085 max mem: 9377 +Train: [21] [ 100/6250] eta: 0:23:00 lr: 0.000116 grad: 0.2446 (0.2889) loss: 0.7004 (0.7218) time: 0.1800 data: 0.0919 max mem: 9377 +Train: [21] [ 200/6250] eta: 0:19:29 lr: 0.000116 grad: 0.3002 (0.2921) loss: 0.7368 (0.7143) time: 0.1662 data: 0.0794 max mem: 9377 +Train: [21] [ 300/6250] eta: 0:18:00 lr: 0.000116 grad: 0.2394 (0.2886) loss: 0.7222 (0.7156) time: 0.1726 data: 0.0708 max mem: 9377 +Train: [21] [ 400/6250] eta: 0:17:23 lr: 0.000116 grad: 0.2385 (0.2780) loss: 0.7294 (0.7162) time: 0.1620 data: 0.0693 max mem: 9377 +Train: [21] [ 500/6250] eta: 0:16:42 lr: 0.000116 grad: 0.1876 (0.2687) loss: 0.7187 (0.7166) time: 0.1735 data: 0.0855 max mem: 9377 +Train: [21] [ 600/6250] eta: 0:16:09 lr: 0.000116 grad: 0.1590 (0.2514) loss: 0.7006 (0.7149) time: 0.1543 data: 0.0597 max mem: 9377 +Train: [21] [ 700/6250] eta: 0:15:40 lr: 0.000116 grad: 0.1448 (0.2386) loss: 0.7226 (0.7139) time: 0.1411 data: 0.0509 max mem: 9377 +Train: [21] [ 800/6250] eta: 0:15:16 lr: 0.000116 grad: 0.1512 (0.2291) loss: 0.7105 (0.7135) time: 0.1498 data: 0.0592 max mem: 9377 +Train: [21] [ 900/6250] eta: 0:14:54 lr: 0.000116 grad: 0.1530 (0.2211) loss: 0.7381 (0.7139) time: 0.1556 data: 0.0664 max mem: 9377 +Train: [21] [1000/6250] eta: 0:14:37 lr: 0.000116 grad: 0.1500 (0.2146) loss: 0.7204 (0.7145) time: 0.1466 data: 0.0556 max mem: 9377 +Train: [21] [1100/6250] eta: 0:14:14 lr: 0.000116 grad: 0.1476 (0.2087) loss: 0.7315 (0.7155) time: 0.1488 data: 0.0486 max mem: 9377 +Train: [21] [1200/6250] eta: 0:13:50 lr: 0.000116 grad: 0.1447 (0.2037) loss: 0.7241 (0.7165) time: 0.1328 data: 0.0310 max mem: 9377 +Train: [21] [1300/6250] eta: 0:13:28 lr: 0.000116 grad: 0.1462 (0.1993) loss: 0.7238 (0.7172) time: 0.1591 data: 0.0675 max mem: 9377 +Train: [21] [1400/6250] eta: 0:13:07 lr: 0.000116 grad: 0.1431 (0.1957) loss: 0.7307 (0.7178) time: 0.1429 data: 0.0446 max mem: 9377 +Train: [21] [1500/6250] eta: 0:12:47 lr: 0.000116 grad: 0.1374 (0.1924) loss: 0.7242 (0.7187) time: 0.1423 data: 0.0568 max mem: 9377 +Train: [21] [1600/6250] eta: 0:12:30 lr: 0.000116 grad: 0.1465 (0.1895) loss: 0.7257 (0.7194) time: 0.1524 data: 0.0723 max mem: 9377 +Train: [21] [1700/6250] eta: 0:12:14 lr: 0.000116 grad: 0.1404 (0.1868) loss: 0.7380 (0.7201) time: 0.1580 data: 0.0648 max mem: 9377 +Train: [21] [1800/6250] eta: 0:11:57 lr: 0.000116 grad: 0.1405 (0.1844) loss: 0.7307 (0.7209) time: 0.1356 data: 0.0542 max mem: 9377 +Train: [21] [1900/6250] eta: 0:11:46 lr: 0.000116 grad: 0.1372 (0.1822) loss: 0.7423 (0.7217) time: 0.1736 data: 0.0779 max mem: 9377 +Train: [21] [2000/6250] eta: 0:11:30 lr: 0.000116 grad: 0.1332 (0.1802) loss: 0.7440 (0.7226) time: 0.1641 data: 0.0660 max mem: 9377 +Train: [21] [2100/6250] eta: 0:11:13 lr: 0.000116 grad: 0.1413 (0.1783) loss: 0.7438 (0.7232) time: 0.1425 data: 0.0530 max mem: 9377 +Train: [21] [2200/6250] eta: 0:10:55 lr: 0.000116 grad: 0.1480 (0.1766) loss: 0.7329 (0.7241) time: 0.1496 data: 0.0566 max mem: 9377 +Train: [21] [2300/6250] eta: 0:10:37 lr: 0.000116 grad: 0.1300 (0.1750) loss: 0.7416 (0.7247) time: 0.1464 data: 0.0553 max mem: 9377 +Train: [21] [2400/6250] eta: 0:10:21 lr: 0.000116 grad: 0.1381 (0.1735) loss: 0.7429 (0.7254) time: 0.1766 data: 0.0969 max mem: 9377 +Train: [21] [2500/6250] eta: 0:10:05 lr: 0.000116 grad: 0.1446 (0.1723) loss: 0.7443 (0.7262) time: 0.2084 data: 0.0498 max mem: 9377 +Train: [21] [2600/6250] eta: 0:09:48 lr: 0.000116 grad: 0.1373 (0.1709) loss: 0.7437 (0.7270) time: 0.1542 data: 0.0650 max mem: 9377 +Train: [21] [2700/6250] eta: 0:09:30 lr: 0.000116 grad: 0.1404 (0.1698) loss: 0.7327 (0.7275) time: 0.1493 data: 0.0567 max mem: 9377 +Train: [21] [2800/6250] eta: 0:09:14 lr: 0.000116 grad: 0.1368 (0.1688) loss: 0.7371 (0.7277) time: 0.1390 data: 0.0466 max mem: 9377 +Train: [21] [2900/6250] eta: 0:08:57 lr: 0.000116 grad: 0.1382 (0.1679) loss: 0.7271 (0.7281) time: 0.1302 data: 0.0382 max mem: 9377 +Train: [21] [3000/6250] eta: 0:08:40 lr: 0.000116 grad: 0.1413 (0.1673) loss: 0.7363 (0.7283) time: 0.1543 data: 0.0682 max mem: 9377 +Train: [21] [3100/6250] eta: 0:08:24 lr: 0.000116 grad: 0.1472 (0.1667) loss: 0.7229 (0.7283) time: 0.1509 data: 0.0614 max mem: 9377 +Train: [21] [3200/6250] eta: 0:08:07 lr: 0.000116 grad: 0.1437 (0.1660) loss: 0.7365 (0.7286) time: 0.1439 data: 0.0508 max mem: 9377 +Train: [21] [3300/6250] eta: 0:07:50 lr: 0.000116 grad: 0.1546 (0.1654) loss: 0.7227 (0.7288) time: 0.1404 data: 0.0436 max mem: 9377 +Train: [21] [3400/6250] eta: 0:07:34 lr: 0.000116 grad: 0.1472 (0.1648) loss: 0.7253 (0.7289) time: 0.1717 data: 0.0796 max mem: 9377 +Train: [21] [3500/6250] eta: 0:07:18 lr: 0.000116 grad: 0.1439 (0.1643) loss: 0.7364 (0.7291) time: 0.1466 data: 0.0677 max mem: 9377 +Train: [21] [3600/6250] eta: 0:07:01 lr: 0.000116 grad: 0.1481 (0.1638) loss: 0.7372 (0.7293) time: 0.1480 data: 0.0504 max mem: 9377 +Train: [21] [3700/6250] eta: 0:06:46 lr: 0.000116 grad: 0.1431 (0.1633) loss: 0.7401 (0.7295) time: 0.1791 data: 0.0865 max mem: 9377 +Train: [21] [3800/6250] eta: 0:06:29 lr: 0.000116 grad: 0.1403 (0.1629) loss: 0.7231 (0.7298) time: 0.1524 data: 0.0654 max mem: 9377 +Train: [21] [3900/6250] eta: 0:06:12 lr: 0.000116 grad: 0.1418 (0.1623) loss: 0.7441 (0.7301) time: 0.1348 data: 0.0496 max mem: 9377 +Train: [21] [4000/6250] eta: 0:05:56 lr: 0.000116 grad: 0.1458 (0.1618) loss: 0.7330 (0.7304) time: 0.1451 data: 0.0510 max mem: 9377 +Train: [21] [4100/6250] eta: 0:05:40 lr: 0.000116 grad: 0.1411 (0.1615) loss: 0.7356 (0.7305) time: 0.1807 data: 0.0914 max mem: 9377 +Train: [21] [4200/6250] eta: 0:05:24 lr: 0.000116 grad: 0.1441 (0.1611) loss: 0.7356 (0.7306) time: 0.1521 data: 0.0664 max mem: 9377 +Train: [21] [4300/6250] eta: 0:05:08 lr: 0.000116 grad: 0.1408 (0.1607) loss: 0.7399 (0.7308) time: 0.1649 data: 0.0767 max mem: 9377 +Train: [21] [4400/6250] eta: 0:04:52 lr: 0.000116 grad: 0.1437 (0.1604) loss: 0.7347 (0.7311) time: 0.1642 data: 0.0720 max mem: 9377 +Train: [21] [4500/6250] eta: 0:04:36 lr: 0.000116 grad: 0.1400 (0.1600) loss: 0.7505 (0.7313) time: 0.1565 data: 0.0683 max mem: 9377 +Train: [21] [4600/6250] eta: 0:04:20 lr: 0.000116 grad: 0.1394 (0.1596) loss: 0.7425 (0.7316) time: 0.1476 data: 0.0576 max mem: 9377 +Train: [21] [4700/6250] eta: 0:04:04 lr: 0.000116 grad: 0.1403 (0.1592) loss: 0.7287 (0.7317) time: 0.1568 data: 0.0681 max mem: 9377 +Train: [21] [4800/6250] eta: 0:03:48 lr: 0.000116 grad: 0.1446 (0.1590) loss: 0.7296 (0.7317) time: 0.1645 data: 0.0765 max mem: 9377 +Train: [21] [4900/6250] eta: 0:03:32 lr: 0.000116 grad: 0.1428 (0.1588) loss: 0.7207 (0.7317) time: 0.1532 data: 0.0644 max mem: 9377 +Train: [21] [5000/6250] eta: 0:03:17 lr: 0.000116 grad: 0.1479 (0.1586) loss: 0.7247 (0.7317) time: 0.1731 data: 0.0924 max mem: 9377 +Train: [21] [5100/6250] eta: 0:03:01 lr: 0.000116 grad: 0.1435 (0.1584) loss: 0.7269 (0.7315) time: 0.1566 data: 0.0633 max mem: 9377 +Train: [21] [5200/6250] eta: 0:02:45 lr: 0.000116 grad: 0.1591 (0.1583) loss: 0.7143 (0.7314) time: 0.1549 data: 0.0661 max mem: 9377 +Train: [21] [5300/6250] eta: 0:02:29 lr: 0.000116 grad: 0.1408 (0.1581) loss: 0.7349 (0.7312) time: 0.1532 data: 0.0614 max mem: 9377 +Train: [21] [5400/6250] eta: 0:02:13 lr: 0.000116 grad: 0.1465 (0.1579) loss: 0.7142 (0.7310) time: 0.1628 data: 0.0790 max mem: 9377 +Train: [21] [5500/6250] eta: 0:01:58 lr: 0.000116 grad: 0.1579 (0.1579) loss: 0.7183 (0.7308) time: 0.1650 data: 0.0769 max mem: 9377 +Train: [21] [5600/6250] eta: 0:01:42 lr: 0.000115 grad: 0.1509 (0.1578) loss: 0.7158 (0.7306) time: 0.1423 data: 0.0456 max mem: 9377 +Train: [21] [5700/6250] eta: 0:01:26 lr: 0.000115 grad: 0.1336 (0.1577) loss: 0.7307 (0.7304) time: 0.1666 data: 0.0678 max mem: 9377 +Train: [21] [5800/6250] eta: 0:01:10 lr: 0.000115 grad: 0.1529 (0.1575) loss: 0.7173 (0.7302) time: 0.1587 data: 0.0745 max mem: 9377 +Train: [21] [5900/6250] eta: 0:00:55 lr: 0.000115 grad: 0.1478 (0.1573) loss: 0.7264 (0.7302) time: 0.1537 data: 0.0608 max mem: 9377 +Train: [21] [6000/6250] eta: 0:00:39 lr: 0.000115 grad: 0.1462 (0.1571) loss: 0.7245 (0.7302) time: 0.1670 data: 0.0776 max mem: 9377 +Train: [21] [6100/6250] eta: 0:00:23 lr: 0.000115 grad: 0.1464 (0.1570) loss: 0.7261 (0.7301) time: 0.1479 data: 0.0568 max mem: 9377 +Train: [21] [6200/6250] eta: 0:00:07 lr: 0.000115 grad: 0.1429 (0.1569) loss: 0.7284 (0.7301) time: 0.2313 data: 0.1433 max mem: 9377 +Train: [21] [6249/6250] eta: 0:00:00 lr: 0.000115 grad: 0.1409 (0.1568) loss: 0.7386 (0.7301) time: 0.1766 data: 0.0856 max mem: 9377 +Train: [21] Total time: 0:16:30 (0.1585 s / it) +Averaged stats: lr: 0.000115 grad: 0.1409 (0.1568) loss: 0.7386 (0.7301) +Eval (hcp-train-subset): [21] [ 0/62] eta: 0:04:55 loss: 0.8661 (0.8661) time: 4.7682 data: 4.7343 max mem: 9377 +Eval (hcp-train-subset): [21] [61/62] eta: 0:00:00 loss: 0.8746 (0.8735) time: 0.1201 data: 0.0931 max mem: 9377 +Eval (hcp-train-subset): [21] Total time: 0:00:14 (0.2361 s / it) +Averaged stats (hcp-train-subset): loss: 0.8746 (0.8735) +Eval (hcp-val): [21] [ 0/62] eta: 0:04:12 loss: 0.8654 (0.8654) time: 4.0672 data: 3.9945 max mem: 9377 +Eval (hcp-val): [21] [61/62] eta: 0:00:00 loss: 0.8715 (0.8725) time: 0.1159 data: 0.0907 max mem: 9377 +Eval (hcp-val): [21] Total time: 0:00:14 (0.2270 s / it) +Averaged stats (hcp-val): loss: 0.8715 (0.8725) +Eval (nsd-val): [21] [ 0/62] eta: 0:03:13 loss: 0.8472 (0.8472) time: 3.1255 data: 3.0371 max mem: 9377 +Eval (nsd-val): [21] [61/62] eta: 0:00:00 loss: 0.8523 (0.8540) time: 0.1242 data: 0.0988 max mem: 9377 +Eval (nsd-val): [21] Total time: 0:00:13 (0.2232 s / it) +Averaged stats (nsd-val): loss: 0.8523 (0.8540) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [22] [ 0/6250] eta: 11:39:45 lr: 0.000115 grad: 0.3094 (0.3094) loss: 0.6383 (0.6383) time: 6.7177 data: 6.6289 max mem: 9377 +Train: [22] [ 100/6250] eta: 0:22:46 lr: 0.000115 grad: 0.2895 (0.3653) loss: 0.7826 (0.7628) time: 0.1744 data: 0.0834 max mem: 9377 +Train: [22] [ 200/6250] eta: 0:19:32 lr: 0.000115 grad: 0.2347 (0.3159) loss: 0.7491 (0.7594) time: 0.1521 data: 0.0611 max mem: 9377 +Train: [22] [ 300/6250] eta: 0:18:02 lr: 0.000115 grad: 0.2017 (0.2875) loss: 0.7354 (0.7541) time: 0.1516 data: 0.0724 max mem: 9377 +Train: [22] [ 400/6250] eta: 0:17:30 lr: 0.000115 grad: 0.1620 (0.2624) loss: 0.7407 (0.7523) time: 0.1708 data: 0.0778 max mem: 9377 +Train: [22] [ 500/6250] eta: 0:16:54 lr: 0.000115 grad: 0.1521 (0.2429) loss: 0.7390 (0.7484) time: 0.1544 data: 0.0595 max mem: 9377 +Train: [22] [ 600/6250] eta: 0:16:20 lr: 0.000115 grad: 0.1555 (0.2294) loss: 0.7406 (0.7466) time: 0.1468 data: 0.0559 max mem: 9377 +Train: [22] [ 700/6250] eta: 0:15:53 lr: 0.000115 grad: 0.1498 (0.2179) loss: 0.7346 (0.7449) time: 0.1527 data: 0.0544 max mem: 9377 +Train: [22] [ 800/6250] eta: 0:15:19 lr: 0.000115 grad: 0.1470 (0.2090) loss: 0.7315 (0.7431) time: 0.1399 data: 0.0444 max mem: 9377 +Train: [22] [ 900/6250] eta: 0:14:56 lr: 0.000115 grad: 0.1476 (0.2023) loss: 0.7284 (0.7413) time: 0.1399 data: 0.0464 max mem: 9377 +Train: [22] [1000/6250] eta: 0:14:39 lr: 0.000115 grad: 0.1442 (0.1967) loss: 0.7242 (0.7405) time: 0.1444 data: 0.0516 max mem: 9377 +Train: [22] [1100/6250] eta: 0:14:15 lr: 0.000115 grad: 0.1380 (0.1925) loss: 0.7194 (0.7394) time: 0.1379 data: 0.0502 max mem: 9377 +Train: [22] [1200/6250] eta: 0:13:55 lr: 0.000115 grad: 0.1426 (0.1884) loss: 0.7351 (0.7389) time: 0.1614 data: 0.0757 max mem: 9377 +Train: [22] [1300/6250] eta: 0:13:43 lr: 0.000115 grad: 0.1474 (0.1861) loss: 0.7447 (0.7385) time: 0.1677 data: 0.0723 max mem: 9377 +Train: [22] [1400/6250] eta: 0:13:27 lr: 0.000115 grad: 0.1443 (0.1838) loss: 0.7271 (0.7376) time: 0.1576 data: 0.0599 max mem: 9377 +Train: [22] [1500/6250] eta: 0:13:08 lr: 0.000115 grad: 0.1397 (0.1815) loss: 0.7369 (0.7372) time: 0.1719 data: 0.0808 max mem: 9377 +Train: [22] [1600/6250] eta: 0:12:54 lr: 0.000115 grad: 0.1398 (0.1793) loss: 0.7303 (0.7369) time: 0.1795 data: 0.0890 max mem: 9377 +Train: [22] [1700/6250] eta: 0:12:34 lr: 0.000115 grad: 0.1447 (0.1772) loss: 0.7177 (0.7361) time: 0.1744 data: 0.0912 max mem: 9377 +Train: [22] [1800/6250] eta: 0:12:16 lr: 0.000115 grad: 0.1457 (0.1758) loss: 0.7138 (0.7352) time: 0.1544 data: 0.0658 max mem: 9377 +Train: [22] [1900/6250] eta: 0:11:58 lr: 0.000115 grad: 0.1466 (0.1745) loss: 0.7173 (0.7345) time: 0.1566 data: 0.0751 max mem: 9377 +Train: [22] [2000/6250] eta: 0:11:42 lr: 0.000115 grad: 0.1478 (0.1736) loss: 0.7204 (0.7337) time: 0.1695 data: 0.0722 max mem: 9377 +Train: [22] [2100/6250] eta: 0:11:25 lr: 0.000115 grad: 0.1504 (0.1728) loss: 0.7150 (0.7329) time: 0.1640 data: 0.0692 max mem: 9377 +Train: [22] [2200/6250] eta: 0:11:07 lr: 0.000115 grad: 0.1598 (0.1719) loss: 0.7045 (0.7323) time: 0.1673 data: 0.0767 max mem: 9377 +Train: [22] [2300/6250] eta: 0:10:48 lr: 0.000115 grad: 0.1549 (0.1713) loss: 0.7082 (0.7314) time: 0.1525 data: 0.0640 max mem: 9377 +Train: [22] [2400/6250] eta: 0:10:31 lr: 0.000115 grad: 0.1506 (0.1705) loss: 0.7123 (0.7307) time: 0.1534 data: 0.0663 max mem: 9377 +Train: [22] [2500/6250] eta: 0:10:13 lr: 0.000115 grad: 0.1533 (0.1700) loss: 0.7162 (0.7299) time: 0.1582 data: 0.0605 max mem: 9377 +Train: [22] [2600/6250] eta: 0:09:55 lr: 0.000115 grad: 0.1401 (0.1692) loss: 0.7221 (0.7294) time: 0.1523 data: 0.0638 max mem: 9377 +Train: [22] [2700/6250] eta: 0:09:37 lr: 0.000115 grad: 0.1418 (0.1685) loss: 0.7277 (0.7288) time: 0.1624 data: 0.0780 max mem: 9377 +Train: [22] [2800/6250] eta: 0:09:21 lr: 0.000115 grad: 0.1397 (0.1677) loss: 0.7219 (0.7284) time: 0.1552 data: 0.0661 max mem: 9377 +Train: [22] [2900/6250] eta: 0:09:03 lr: 0.000115 grad: 0.1489 (0.1670) loss: 0.7255 (0.7280) time: 0.1599 data: 0.0788 max mem: 9377 +Train: [22] [3000/6250] eta: 0:08:46 lr: 0.000115 grad: 0.1506 (0.1665) loss: 0.7145 (0.7275) time: 0.1797 data: 0.1003 max mem: 9377 +Train: [22] [3100/6250] eta: 0:08:29 lr: 0.000115 grad: 0.1453 (0.1659) loss: 0.7133 (0.7271) time: 0.1519 data: 0.0661 max mem: 9377 +Train: [22] [3200/6250] eta: 0:08:12 lr: 0.000115 grad: 0.1524 (0.1656) loss: 0.7214 (0.7270) time: 0.1431 data: 0.0494 max mem: 9377 +Train: [22] [3300/6250] eta: 0:07:55 lr: 0.000115 grad: 0.1414 (0.1652) loss: 0.7144 (0.7266) time: 0.1516 data: 0.0619 max mem: 9377 +Train: [22] [3400/6250] eta: 0:07:38 lr: 0.000115 grad: 0.1487 (0.1647) loss: 0.7286 (0.7264) time: 0.1391 data: 0.0493 max mem: 9377 +Train: [22] [3500/6250] eta: 0:07:22 lr: 0.000115 grad: 0.1469 (0.1643) loss: 0.7182 (0.7262) time: 0.1724 data: 0.0890 max mem: 9377 +Train: [22] [3600/6250] eta: 0:07:05 lr: 0.000115 grad: 0.1459 (0.1639) loss: 0.7092 (0.7259) time: 0.1666 data: 0.0833 max mem: 9377 +Train: [22] [3700/6250] eta: 0:06:48 lr: 0.000115 grad: 0.1401 (0.1635) loss: 0.7216 (0.7256) time: 0.1458 data: 0.0630 max mem: 9377 +Train: [22] [3800/6250] eta: 0:06:31 lr: 0.000115 grad: 0.1495 (0.1631) loss: 0.7157 (0.7255) time: 0.1699 data: 0.0841 max mem: 9377 +Train: [22] [3900/6250] eta: 0:06:15 lr: 0.000115 grad: 0.1533 (0.1627) loss: 0.7159 (0.7253) time: 0.1457 data: 0.0520 max mem: 9377 +Train: [22] [4000/6250] eta: 0:05:59 lr: 0.000115 grad: 0.1407 (0.1623) loss: 0.7293 (0.7253) time: 0.1404 data: 0.0430 max mem: 9377 +Train: [22] [4100/6250] eta: 0:05:42 lr: 0.000115 grad: 0.1466 (0.1619) loss: 0.7388 (0.7253) time: 0.1486 data: 0.0616 max mem: 9377 +Train: [22] [4200/6250] eta: 0:05:26 lr: 0.000115 grad: 0.1457 (0.1616) loss: 0.7217 (0.7253) time: 0.1591 data: 0.0757 max mem: 9377 +Train: [22] [4300/6250] eta: 0:05:10 lr: 0.000115 grad: 0.1458 (0.1612) loss: 0.7130 (0.7252) time: 0.1621 data: 0.0756 max mem: 9377 +Train: [22] [4400/6250] eta: 0:04:54 lr: 0.000115 grad: 0.1418 (0.1610) loss: 0.7348 (0.7251) time: 0.1572 data: 0.0678 max mem: 9377 +Train: [22] [4500/6250] eta: 0:04:38 lr: 0.000115 grad: 0.1534 (0.1607) loss: 0.7254 (0.7251) time: 0.1812 data: 0.0930 max mem: 9377 +Train: [22] [4600/6250] eta: 0:04:22 lr: 0.000115 grad: 0.1577 (0.1605) loss: 0.7053 (0.7248) time: 0.1526 data: 0.0782 max mem: 9377 +Train: [22] [4700/6250] eta: 0:04:06 lr: 0.000115 grad: 0.1479 (0.1604) loss: 0.7217 (0.7247) time: 0.1526 data: 0.0588 max mem: 9377 +Train: [22] [4800/6250] eta: 0:03:50 lr: 0.000115 grad: 0.1416 (0.1602) loss: 0.7232 (0.7246) time: 0.1638 data: 0.0772 max mem: 9377 +Train: [22] [4900/6250] eta: 0:03:33 lr: 0.000114 grad: 0.1458 (0.1601) loss: 0.7262 (0.7245) time: 0.1503 data: 0.0641 max mem: 9377 +Train: [22] [5000/6250] eta: 0:03:18 lr: 0.000114 grad: 0.1465 (0.1599) loss: 0.7332 (0.7244) time: 0.1303 data: 0.0444 max mem: 9377 +Train: [22] [5100/6250] eta: 0:03:02 lr: 0.000114 grad: 0.1510 (0.1597) loss: 0.7240 (0.7244) time: 0.1773 data: 0.0945 max mem: 9377 +Train: [22] [5200/6250] eta: 0:02:46 lr: 0.000114 grad: 0.1498 (0.1595) loss: 0.7124 (0.7242) time: 0.1374 data: 0.0537 max mem: 9377 +Train: [22] [5300/6250] eta: 0:02:30 lr: 0.000114 grad: 0.1465 (0.1593) loss: 0.7063 (0.7241) time: 0.1152 data: 0.0190 max mem: 9377 +Train: [22] [5400/6250] eta: 0:02:14 lr: 0.000114 grad: 0.1472 (0.1591) loss: 0.7017 (0.7240) time: 0.1405 data: 0.0520 max mem: 9377 +Train: [22] [5500/6250] eta: 0:01:58 lr: 0.000114 grad: 0.1420 (0.1590) loss: 0.7295 (0.7239) time: 0.1281 data: 0.0394 max mem: 9377 +Train: [22] [5600/6250] eta: 0:01:43 lr: 0.000114 grad: 0.1403 (0.1588) loss: 0.7199 (0.7238) time: 0.1770 data: 0.0890 max mem: 9377 +Train: [22] [5700/6250] eta: 0:01:27 lr: 0.000114 grad: 0.1420 (0.1586) loss: 0.7155 (0.7238) time: 0.1629 data: 0.0695 max mem: 9377 +Train: [22] [5800/6250] eta: 0:01:11 lr: 0.000114 grad: 0.1409 (0.1584) loss: 0.7212 (0.7237) time: 0.1611 data: 0.0823 max mem: 9377 +Train: [22] [5900/6250] eta: 0:00:55 lr: 0.000114 grad: 0.1492 (0.1582) loss: 0.7097 (0.7236) time: 0.1709 data: 0.0733 max mem: 9377 +Train: [22] [6000/6250] eta: 0:00:39 lr: 0.000114 grad: 0.1515 (0.1581) loss: 0.7293 (0.7236) time: 0.1419 data: 0.0440 max mem: 9377 +Train: [22] [6100/6250] eta: 0:00:23 lr: 0.000114 grad: 0.1490 (0.1579) loss: 0.7287 (0.7236) time: 0.1611 data: 0.0609 max mem: 9377 +Train: [22] [6200/6250] eta: 0:00:07 lr: 0.000114 grad: 0.1571 (0.1579) loss: 0.7208 (0.7236) time: 0.1427 data: 0.0512 max mem: 9377 +Train: [22] [6249/6250] eta: 0:00:00 lr: 0.000114 grad: 0.1435 (0.1578) loss: 0.7243 (0.7236) time: 0.1443 data: 0.0546 max mem: 9377 +Train: [22] Total time: 0:16:33 (0.1589 s / it) +Averaged stats: lr: 0.000114 grad: 0.1435 (0.1578) loss: 0.7243 (0.7236) +Eval (hcp-train-subset): [22] [ 0/62] eta: 0:03:30 loss: 0.8691 (0.8691) time: 3.3932 data: 3.3120 max mem: 9377 +Eval (hcp-train-subset): [22] [61/62] eta: 0:00:00 loss: 0.8722 (0.8760) time: 0.1157 data: 0.0888 max mem: 9377 +Eval (hcp-train-subset): [22] Total time: 0:00:14 (0.2307 s / it) +Averaged stats (hcp-train-subset): loss: 0.8722 (0.8760) +Eval (hcp-val): [22] [ 0/62] eta: 0:04:24 loss: 0.8752 (0.8752) time: 4.2600 data: 4.1919 max mem: 9377 +Eval (hcp-val): [22] [61/62] eta: 0:00:00 loss: 0.8744 (0.8751) time: 0.1374 data: 0.1119 max mem: 9377 +Eval (hcp-val): [22] Total time: 0:00:14 (0.2367 s / it) +Averaged stats (hcp-val): loss: 0.8744 (0.8751) +Eval (nsd-val): [22] [ 0/62] eta: 0:05:55 loss: 0.8508 (0.8508) time: 5.7323 data: 5.7011 max mem: 9377 +Eval (nsd-val): [22] [61/62] eta: 0:00:00 loss: 0.8575 (0.8584) time: 0.1435 data: 0.1180 max mem: 9377 +Eval (nsd-val): [22] Total time: 0:00:14 (0.2261 s / it) +Averaged stats (nsd-val): loss: 0.8575 (0.8584) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [23] [ 0/6250] eta: 9:32:25 lr: 0.000114 grad: 0.2405 (0.2405) loss: 0.7559 (0.7559) time: 5.4953 data: 5.3122 max mem: 9377 +Train: [23] [ 100/6250] eta: 0:22:39 lr: 0.000114 grad: 0.2283 (0.2942) loss: 0.7200 (0.7348) time: 0.1456 data: 0.0597 max mem: 9377 +Train: [23] [ 200/6250] eta: 0:19:34 lr: 0.000114 grad: 0.2263 (0.2887) loss: 0.7139 (0.7384) time: 0.1357 data: 0.0461 max mem: 9377 +Train: [23] [ 300/6250] eta: 0:18:11 lr: 0.000114 grad: 0.2369 (0.2846) loss: 0.7270 (0.7387) time: 0.1485 data: 0.0655 max mem: 9377 +Train: [23] [ 400/6250] eta: 0:17:16 lr: 0.000114 grad: 0.1783 (0.2661) loss: 0.7242 (0.7360) time: 0.1798 data: 0.0849 max mem: 9377 +Train: [23] [ 500/6250] eta: 0:16:42 lr: 0.000114 grad: 0.1655 (0.2486) loss: 0.7196 (0.7348) time: 0.1582 data: 0.0721 max mem: 9377 +Train: [23] [ 600/6250] eta: 0:16:07 lr: 0.000114 grad: 0.1794 (0.2385) loss: 0.7459 (0.7345) time: 0.1558 data: 0.0555 max mem: 9377 +Train: [23] [ 700/6250] eta: 0:15:40 lr: 0.000114 grad: 0.1728 (0.2317) loss: 0.7251 (0.7331) time: 0.1650 data: 0.0722 max mem: 9377 +Train: [23] [ 800/6250] eta: 0:15:23 lr: 0.000114 grad: 0.1497 (0.2232) loss: 0.7329 (0.7330) time: 0.1646 data: 0.0628 max mem: 9377 +Train: [23] [ 900/6250] eta: 0:15:03 lr: 0.000114 grad: 0.1639 (0.2162) loss: 0.7099 (0.7323) time: 0.1438 data: 0.0461 max mem: 9377 +Train: [23] [1000/6250] eta: 0:14:45 lr: 0.000114 grad: 0.1581 (0.2102) loss: 0.7142 (0.7316) time: 0.1585 data: 0.0766 max mem: 9377 +Train: [23] [1100/6250] eta: 0:14:28 lr: 0.000114 grad: 0.1398 (0.2047) loss: 0.7112 (0.7308) time: 0.1858 data: 0.1004 max mem: 9377 +Train: [23] [1200/6250] eta: 0:14:11 lr: 0.000114 grad: 0.1356 (0.1996) loss: 0.7343 (0.7305) time: 0.1762 data: 0.0971 max mem: 9377 +Train: [23] [1300/6250] eta: 0:13:51 lr: 0.000114 grad: 0.1418 (0.1957) loss: 0.7225 (0.7306) time: 0.1735 data: 0.0841 max mem: 9377 +Train: [23] [1400/6250] eta: 0:13:36 lr: 0.000114 grad: 0.1516 (0.1925) loss: 0.7148 (0.7303) time: 0.1568 data: 0.0636 max mem: 9377 +Train: [23] [1500/6250] eta: 0:13:16 lr: 0.000114 grad: 0.1571 (0.1901) loss: 0.7170 (0.7300) time: 0.1445 data: 0.0547 max mem: 9377 +Train: [23] [1600/6250] eta: 0:12:59 lr: 0.000114 grad: 0.1445 (0.1876) loss: 0.7373 (0.7300) time: 0.1874 data: 0.0920 max mem: 9377 +Train: [23] [1700/6250] eta: 0:12:46 lr: 0.000114 grad: 0.1553 (0.1859) loss: 0.7261 (0.7299) time: 0.1624 data: 0.0698 max mem: 9377 +Train: [23] [1800/6250] eta: 0:12:31 lr: 0.000114 grad: 0.1487 (0.1840) loss: 0.7267 (0.7298) time: 0.1717 data: 0.0879 max mem: 9377 +Train: [23] [1900/6250] eta: 0:12:15 lr: 0.000114 grad: 0.1384 (0.1821) loss: 0.7338 (0.7298) time: 0.1560 data: 0.0696 max mem: 9377 +Train: [23] [2000/6250] eta: 0:11:56 lr: 0.000114 grad: 0.1448 (0.1805) loss: 0.7373 (0.7298) time: 0.1582 data: 0.0639 max mem: 9377 +Train: [23] [2100/6250] eta: 0:11:40 lr: 0.000114 grad: 0.1572 (0.1791) loss: 0.7224 (0.7296) time: 0.1611 data: 0.0708 max mem: 9377 +Train: [23] [2200/6250] eta: 0:11:22 lr: 0.000114 grad: 0.1435 (0.1782) loss: 0.7125 (0.7297) time: 0.1643 data: 0.0627 max mem: 9377 +Train: [23] [2300/6250] eta: 0:11:02 lr: 0.000114 grad: 0.1402 (0.1767) loss: 0.7384 (0.7298) time: 0.1323 data: 0.0382 max mem: 9377 +Train: [23] [2400/6250] eta: 0:10:44 lr: 0.000114 grad: 0.1510 (0.1756) loss: 0.7077 (0.7298) time: 0.1720 data: 0.0841 max mem: 9377 +Train: [23] [2500/6250] eta: 0:10:26 lr: 0.000114 grad: 0.1472 (0.1745) loss: 0.7247 (0.7297) time: 0.1811 data: 0.0963 max mem: 9377 +Train: [23] [2600/6250] eta: 0:10:07 lr: 0.000114 grad: 0.1492 (0.1736) loss: 0.7274 (0.7297) time: 0.1506 data: 0.0581 max mem: 9377 +Train: [23] [2700/6250] eta: 0:09:49 lr: 0.000114 grad: 0.1372 (0.1725) loss: 0.7410 (0.7297) time: 0.1414 data: 0.0418 max mem: 9377 +Train: [23] [2800/6250] eta: 0:09:31 lr: 0.000114 grad: 0.1369 (0.1714) loss: 0.7210 (0.7294) time: 0.1610 data: 0.0746 max mem: 9377 +Train: [23] [2900/6250] eta: 0:09:13 lr: 0.000114 grad: 0.1519 (0.1706) loss: 0.7114 (0.7291) time: 0.1546 data: 0.0639 max mem: 9377 +Train: [23] [3000/6250] eta: 0:08:55 lr: 0.000114 grad: 0.1436 (0.1699) loss: 0.7331 (0.7290) time: 0.1430 data: 0.0540 max mem: 9377 +Train: [23] [3100/6250] eta: 0:08:37 lr: 0.000114 grad: 0.1347 (0.1692) loss: 0.7196 (0.7287) time: 0.1409 data: 0.0534 max mem: 9377 +Train: [23] [3200/6250] eta: 0:08:20 lr: 0.000114 grad: 0.1381 (0.1684) loss: 0.7345 (0.7286) time: 0.1402 data: 0.0509 max mem: 9377 +Train: [23] [3300/6250] eta: 0:08:03 lr: 0.000114 grad: 0.1486 (0.1677) loss: 0.7185 (0.7285) time: 0.1240 data: 0.0386 max mem: 9377 +Train: [23] [3400/6250] eta: 0:07:45 lr: 0.000114 grad: 0.1411 (0.1671) loss: 0.7390 (0.7286) time: 0.1493 data: 0.0556 max mem: 9377 +Train: [23] [3500/6250] eta: 0:07:28 lr: 0.000114 grad: 0.1408 (0.1665) loss: 0.7322 (0.7287) time: 0.1327 data: 0.0413 max mem: 9377 +Train: [23] [3600/6250] eta: 0:07:11 lr: 0.000114 grad: 0.1519 (0.1659) loss: 0.7385 (0.7289) time: 0.1534 data: 0.0734 max mem: 9377 +Train: [23] [3700/6250] eta: 0:06:54 lr: 0.000114 grad: 0.1396 (0.1655) loss: 0.7341 (0.7291) time: 0.1807 data: 0.0918 max mem: 9377 +Train: [23] [3800/6250] eta: 0:06:36 lr: 0.000114 grad: 0.1437 (0.1649) loss: 0.7403 (0.7292) time: 0.1587 data: 0.0698 max mem: 9377 +Train: [23] [3900/6250] eta: 0:06:20 lr: 0.000114 grad: 0.1390 (0.1644) loss: 0.7343 (0.7294) time: 0.1501 data: 0.0624 max mem: 9377 +Train: [23] [4000/6250] eta: 0:06:03 lr: 0.000113 grad: 0.1544 (0.1641) loss: 0.7313 (0.7296) time: 0.1500 data: 0.0645 max mem: 9377 +Train: [23] [4100/6250] eta: 0:05:46 lr: 0.000113 grad: 0.1398 (0.1636) loss: 0.7296 (0.7297) time: 0.1432 data: 0.0561 max mem: 9377 +Train: [23] [4200/6250] eta: 0:05:30 lr: 0.000113 grad: 0.1435 (0.1632) loss: 0.7302 (0.7297) time: 0.0914 data: 0.0010 max mem: 9377 +Train: [23] [4300/6250] eta: 0:05:13 lr: 0.000113 grad: 0.1492 (0.1629) loss: 0.7220 (0.7296) time: 0.1501 data: 0.0611 max mem: 9377 +Train: [23] [4400/6250] eta: 0:04:56 lr: 0.000113 grad: 0.1320 (0.1625) loss: 0.7310 (0.7297) time: 0.1500 data: 0.0643 max mem: 9377 +Train: [23] [4500/6250] eta: 0:04:40 lr: 0.000113 grad: 0.1537 (0.1623) loss: 0.7351 (0.7297) time: 0.1296 data: 0.0453 max mem: 9377 +Train: [23] [4600/6250] eta: 0:04:24 lr: 0.000113 grad: 0.1500 (0.1619) loss: 0.7215 (0.7298) time: 0.1432 data: 0.0571 max mem: 9377 +Train: [23] [4700/6250] eta: 0:04:08 lr: 0.000113 grad: 0.1372 (0.1616) loss: 0.7339 (0.7297) time: 0.1468 data: 0.0677 max mem: 9377 +Train: [23] [4800/6250] eta: 0:03:52 lr: 0.000113 grad: 0.1476 (0.1614) loss: 0.7288 (0.7297) time: 0.1573 data: 0.0715 max mem: 9377 +Train: [23] [4900/6250] eta: 0:03:35 lr: 0.000113 grad: 0.1460 (0.1611) loss: 0.7280 (0.7297) time: 0.1386 data: 0.0494 max mem: 9377 +Train: [23] [5000/6250] eta: 0:03:19 lr: 0.000113 grad: 0.1432 (0.1608) loss: 0.7334 (0.7297) time: 0.1607 data: 0.0807 max mem: 9377 +Train: [23] [5100/6250] eta: 0:03:03 lr: 0.000113 grad: 0.1502 (0.1606) loss: 0.7270 (0.7297) time: 0.1397 data: 0.0517 max mem: 9377 +Train: [23] [5200/6250] eta: 0:02:47 lr: 0.000113 grad: 0.1585 (0.1603) loss: 0.7083 (0.7296) time: 0.1423 data: 0.0484 max mem: 9377 +Train: [23] [5300/6250] eta: 0:02:31 lr: 0.000113 grad: 0.1444 (0.1600) loss: 0.7180 (0.7297) time: 0.1556 data: 0.0679 max mem: 9377 +Train: [23] [5400/6250] eta: 0:02:15 lr: 0.000113 grad: 0.1417 (0.1598) loss: 0.7349 (0.7297) time: 0.1179 data: 0.0241 max mem: 9377 +Train: [23] [5500/6250] eta: 0:01:59 lr: 0.000113 grad: 0.1405 (0.1595) loss: 0.7304 (0.7297) time: 0.1358 data: 0.0316 max mem: 9377 +Train: [23] [5600/6250] eta: 0:01:43 lr: 0.000113 grad: 0.1396 (0.1594) loss: 0.7299 (0.7296) time: 0.1736 data: 0.0828 max mem: 9377 +Train: [23] [5700/6250] eta: 0:01:27 lr: 0.000113 grad: 0.1375 (0.1591) loss: 0.7375 (0.7296) time: 0.1754 data: 0.0872 max mem: 9377 +Train: [23] [5800/6250] eta: 0:01:11 lr: 0.000113 grad: 0.1378 (0.1590) loss: 0.7162 (0.7294) time: 0.1041 data: 0.0003 max mem: 9377 +Train: [23] [5900/6250] eta: 0:00:55 lr: 0.000113 grad: 0.1545 (0.1589) loss: 0.7145 (0.7293) time: 0.1478 data: 0.0575 max mem: 9377 +Train: [23] [6000/6250] eta: 0:00:39 lr: 0.000113 grad: 0.1481 (0.1588) loss: 0.7194 (0.7291) time: 0.1156 data: 0.0247 max mem: 9377 +Train: [23] [6100/6250] eta: 0:00:23 lr: 0.000113 grad: 0.1565 (0.1587) loss: 0.7135 (0.7290) time: 0.1496 data: 0.0590 max mem: 9377 +Train: [23] [6200/6250] eta: 0:00:07 lr: 0.000113 grad: 0.1529 (0.1585) loss: 0.7089 (0.7288) time: 0.1420 data: 0.0591 max mem: 9377 +Train: [23] [6249/6250] eta: 0:00:00 lr: 0.000113 grad: 0.1482 (0.1584) loss: 0.7254 (0.7287) time: 0.1368 data: 0.0444 max mem: 9377 +Train: [23] Total time: 0:16:35 (0.1594 s / it) +Averaged stats: lr: 0.000113 grad: 0.1482 (0.1584) loss: 0.7254 (0.7287) +Eval (hcp-train-subset): [23] [ 0/62] eta: 0:05:02 loss: 0.8663 (0.8663) time: 4.8736 data: 4.8428 max mem: 9377 +Eval (hcp-train-subset): [23] [61/62] eta: 0:00:00 loss: 0.8725 (0.8749) time: 0.1417 data: 0.1165 max mem: 9377 +Eval (hcp-train-subset): [23] Total time: 0:00:14 (0.2322 s / it) +Averaged stats (hcp-train-subset): loss: 0.8725 (0.8749) +Eval (hcp-val): [23] [ 0/62] eta: 0:05:56 loss: 0.8748 (0.8748) time: 5.7450 data: 5.7152 max mem: 9377 +Eval (hcp-val): [23] [61/62] eta: 0:00:00 loss: 0.8707 (0.8741) time: 0.1032 data: 0.0781 max mem: 9377 +Eval (hcp-val): [23] Total time: 0:00:14 (0.2351 s / it) +Averaged stats (hcp-val): loss: 0.8707 (0.8741) +Eval (nsd-val): [23] [ 0/62] eta: 0:03:54 loss: 0.8502 (0.8502) time: 3.7900 data: 3.6929 max mem: 9377 +Eval (nsd-val): [23] [61/62] eta: 0:00:00 loss: 0.8565 (0.8598) time: 0.1389 data: 0.1133 max mem: 9377 +Eval (nsd-val): [23] Total time: 0:00:14 (0.2287 s / it) +Averaged stats (nsd-val): loss: 0.8565 (0.8598) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [24] [ 0/6250] eta: 12:24:26 lr: 0.000113 grad: 0.2537 (0.2537) loss: 0.7442 (0.7442) time: 7.1466 data: 7.0377 max mem: 9377 +Train: [24] [ 100/6250] eta: 0:25:14 lr: 0.000113 grad: 0.2411 (0.2476) loss: 0.7484 (0.7735) time: 0.2439 data: 0.1227 max mem: 9377 +Train: [24] [ 200/6250] eta: 0:20:56 lr: 0.000113 grad: 0.3107 (0.2803) loss: 0.7386 (0.7549) time: 0.1565 data: 0.0669 max mem: 9377 +Train: [24] [ 300/6250] eta: 0:19:56 lr: 0.000113 grad: 0.2321 (0.2756) loss: 0.7320 (0.7450) time: 0.1801 data: 0.0874 max mem: 9377 +Train: [24] [ 400/6250] eta: 0:18:46 lr: 0.000113 grad: 0.1820 (0.2603) loss: 0.7344 (0.7398) time: 0.1611 data: 0.0688 max mem: 9377 +Train: [24] [ 500/6250] eta: 0:18:23 lr: 0.000113 grad: 0.1922 (0.2499) loss: 0.7224 (0.7369) time: 0.2081 data: 0.1063 max mem: 9377 +Train: [24] [ 600/6250] eta: 0:17:55 lr: 0.000113 grad: 0.1594 (0.2380) loss: 0.7265 (0.7335) time: 0.1620 data: 0.0695 max mem: 9377 +Train: [24] [ 700/6250] eta: 0:17:29 lr: 0.000113 grad: 0.1549 (0.2268) loss: 0.7115 (0.7311) time: 0.1875 data: 0.0744 max mem: 9377 +Train: [24] [ 800/6250] eta: 0:17:03 lr: 0.000113 grad: 0.1426 (0.2173) loss: 0.7259 (0.7301) time: 0.1656 data: 0.0730 max mem: 9377 +Train: [24] [ 900/6250] eta: 0:16:28 lr: 0.000113 grad: 0.1483 (0.2094) loss: 0.7236 (0.7288) time: 0.1595 data: 0.0591 max mem: 9377 +Train: [24] [1000/6250] eta: 0:15:54 lr: 0.000113 grad: 0.1407 (0.2030) loss: 0.7125 (0.7281) time: 0.1547 data: 0.0569 max mem: 9377 +Train: [24] [1100/6250] eta: 0:15:34 lr: 0.000113 grad: 0.1397 (0.1977) loss: 0.7072 (0.7273) time: 0.1526 data: 0.0653 max mem: 9377 +Train: [24] [1200/6250] eta: 0:15:11 lr: 0.000113 grad: 0.1419 (0.1934) loss: 0.7238 (0.7266) time: 0.1766 data: 0.0849 max mem: 9377 +Train: [24] [1300/6250] eta: 0:14:48 lr: 0.000113 grad: 0.1404 (0.1895) loss: 0.7175 (0.7264) time: 0.1454 data: 0.0628 max mem: 9377 +Train: [24] [1400/6250] eta: 0:14:24 lr: 0.000113 grad: 0.1349 (0.1863) loss: 0.7287 (0.7259) time: 0.1726 data: 0.0834 max mem: 9377 +Train: [24] [1500/6250] eta: 0:14:03 lr: 0.000113 grad: 0.1489 (0.1837) loss: 0.7125 (0.7255) time: 0.1596 data: 0.0664 max mem: 9377 +Train: [24] [1600/6250] eta: 0:13:43 lr: 0.000113 grad: 0.1435 (0.1815) loss: 0.7248 (0.7253) time: 0.1890 data: 0.1011 max mem: 9377 +Train: [24] [1700/6250] eta: 0:13:23 lr: 0.000113 grad: 0.1448 (0.1795) loss: 0.7271 (0.7252) time: 0.1333 data: 0.0425 max mem: 9377 +Train: [24] [1800/6250] eta: 0:13:06 lr: 0.000113 grad: 0.1453 (0.1776) loss: 0.7296 (0.7250) time: 0.1560 data: 0.0538 max mem: 9377 +Train: [24] [1900/6250] eta: 0:12:47 lr: 0.000113 grad: 0.1478 (0.1757) loss: 0.7277 (0.7251) time: 0.1891 data: 0.1005 max mem: 9377 +Train: [24] [2000/6250] eta: 0:12:26 lr: 0.000113 grad: 0.1434 (0.1743) loss: 0.7108 (0.7248) time: 0.1787 data: 0.0790 max mem: 9377 +Train: [24] [2100/6250] eta: 0:12:07 lr: 0.000113 grad: 0.1469 (0.1730) loss: 0.7095 (0.7242) time: 0.1610 data: 0.0713 max mem: 9377 +Train: [24] [2200/6250] eta: 0:11:48 lr: 0.000113 grad: 0.1498 (0.1719) loss: 0.7163 (0.7239) time: 0.1660 data: 0.0729 max mem: 9377 +Train: [24] [2300/6250] eta: 0:11:29 lr: 0.000113 grad: 0.1516 (0.1711) loss: 0.7210 (0.7233) time: 0.1601 data: 0.0674 max mem: 9377 +Train: [24] [2400/6250] eta: 0:11:08 lr: 0.000113 grad: 0.1386 (0.1703) loss: 0.7152 (0.7232) time: 0.1360 data: 0.0288 max mem: 9377 +Train: [24] [2500/6250] eta: 0:10:47 lr: 0.000113 grad: 0.1499 (0.1695) loss: 0.7085 (0.7228) time: 0.1520 data: 0.0639 max mem: 9377 +Train: [24] [2600/6250] eta: 0:10:28 lr: 0.000113 grad: 0.1458 (0.1687) loss: 0.7178 (0.7226) time: 0.1629 data: 0.0707 max mem: 9377 +Train: [24] [2700/6250] eta: 0:10:09 lr: 0.000113 grad: 0.1429 (0.1678) loss: 0.7209 (0.7223) time: 0.1664 data: 0.0760 max mem: 9377 +Train: [24] [2800/6250] eta: 0:09:49 lr: 0.000113 grad: 0.1478 (0.1671) loss: 0.7088 (0.7221) time: 0.1577 data: 0.0676 max mem: 9377 +Train: [24] [2900/6250] eta: 0:09:31 lr: 0.000112 grad: 0.1445 (0.1665) loss: 0.7140 (0.7219) time: 0.1618 data: 0.0702 max mem: 9377 +Train: [24] [3000/6250] eta: 0:09:13 lr: 0.000112 grad: 0.1458 (0.1658) loss: 0.7059 (0.7218) time: 0.1654 data: 0.0796 max mem: 9377 +Train: [24] [3100/6250] eta: 0:08:54 lr: 0.000112 grad: 0.1392 (0.1651) loss: 0.7227 (0.7216) time: 0.1747 data: 0.0850 max mem: 9377 +Train: [24] [3200/6250] eta: 0:08:36 lr: 0.000112 grad: 0.1419 (0.1646) loss: 0.7235 (0.7214) time: 0.1294 data: 0.0367 max mem: 9377 +Train: [24] [3300/6250] eta: 0:08:18 lr: 0.000112 grad: 0.1481 (0.1642) loss: 0.7110 (0.7214) time: 0.1605 data: 0.0801 max mem: 9377 +Train: [24] [3400/6250] eta: 0:07:59 lr: 0.000112 grad: 0.1440 (0.1636) loss: 0.7140 (0.7213) time: 0.1588 data: 0.0700 max mem: 9377 +Train: [24] [3500/6250] eta: 0:07:42 lr: 0.000112 grad: 0.1424 (0.1632) loss: 0.7054 (0.7211) time: 0.1548 data: 0.0680 max mem: 9377 +Train: [24] [3600/6250] eta: 0:07:24 lr: 0.000112 grad: 0.1476 (0.1630) loss: 0.7105 (0.7208) time: 0.1500 data: 0.0546 max mem: 9377 +Train: [24] [3700/6250] eta: 0:07:06 lr: 0.000112 grad: 0.1451 (0.1627) loss: 0.7261 (0.7206) time: 0.1533 data: 0.0651 max mem: 9377 +Train: [24] [3800/6250] eta: 0:06:49 lr: 0.000112 grad: 0.1489 (0.1623) loss: 0.7239 (0.7205) time: 0.1644 data: 0.0727 max mem: 9377 +Train: [24] [3900/6250] eta: 0:06:32 lr: 0.000112 grad: 0.1405 (0.1621) loss: 0.7257 (0.7205) time: 0.2125 data: 0.1230 max mem: 9377 +Train: [24] [4000/6250] eta: 0:06:14 lr: 0.000112 grad: 0.1522 (0.1619) loss: 0.7090 (0.7205) time: 0.1559 data: 0.0665 max mem: 9377 +Train: [24] [4100/6250] eta: 0:05:56 lr: 0.000112 grad: 0.1425 (0.1617) loss: 0.7234 (0.7204) time: 0.1567 data: 0.0771 max mem: 9377 +Train: [24] [4200/6250] eta: 0:05:39 lr: 0.000112 grad: 0.1462 (0.1614) loss: 0.7271 (0.7204) time: 0.1377 data: 0.0517 max mem: 9377 +Train: [24] [4300/6250] eta: 0:05:22 lr: 0.000112 grad: 0.1430 (0.1611) loss: 0.7301 (0.7203) time: 0.1807 data: 0.0934 max mem: 9377 +Train: [24] [4400/6250] eta: 0:05:05 lr: 0.000112 grad: 0.1488 (0.1609) loss: 0.7147 (0.7202) time: 0.1622 data: 0.0810 max mem: 9377 +Train: [24] [4500/6250] eta: 0:04:48 lr: 0.000112 grad: 0.1532 (0.1608) loss: 0.7113 (0.7201) time: 0.1560 data: 0.0684 max mem: 9377 +Train: [24] [4600/6250] eta: 0:04:32 lr: 0.000112 grad: 0.1532 (0.1606) loss: 0.7104 (0.7199) time: 0.1847 data: 0.1047 max mem: 9377 +Train: [24] [4700/6250] eta: 0:04:15 lr: 0.000112 grad: 0.1583 (0.1605) loss: 0.7015 (0.7198) time: 0.1630 data: 0.0777 max mem: 9377 +Train: [24] [4800/6250] eta: 0:03:58 lr: 0.000112 grad: 0.1453 (0.1603) loss: 0.7161 (0.7197) time: 0.1664 data: 0.0897 max mem: 9377 +Train: [24] [4900/6250] eta: 0:03:42 lr: 0.000112 grad: 0.1572 (0.1602) loss: 0.7195 (0.7196) time: 0.1745 data: 0.0865 max mem: 9377 +Train: [24] [5000/6250] eta: 0:03:25 lr: 0.000112 grad: 0.1513 (0.1600) loss: 0.7232 (0.7196) time: 0.1645 data: 0.0747 max mem: 9377 +Train: [24] [5100/6250] eta: 0:03:09 lr: 0.000112 grad: 0.1492 (0.1598) loss: 0.7059 (0.7195) time: 0.1594 data: 0.0769 max mem: 9377 +Train: [24] [5200/6250] eta: 0:02:52 lr: 0.000112 grad: 0.1480 (0.1597) loss: 0.7030 (0.7194) time: 0.1404 data: 0.0457 max mem: 9377 +Train: [24] [5300/6250] eta: 0:02:35 lr: 0.000112 grad: 0.1497 (0.1596) loss: 0.7282 (0.7193) time: 0.1536 data: 0.0671 max mem: 9377 +Train: [24] [5400/6250] eta: 0:02:19 lr: 0.000112 grad: 0.1450 (0.1597) loss: 0.7181 (0.7192) time: 0.1381 data: 0.0450 max mem: 9377 +Train: [24] [5500/6250] eta: 0:02:02 lr: 0.000112 grad: 0.1557 (0.1596) loss: 0.7011 (0.7191) time: 0.1462 data: 0.0525 max mem: 9377 +Train: [24] [5600/6250] eta: 0:01:46 lr: 0.000112 grad: 0.1489 (0.1594) loss: 0.7101 (0.7189) time: 0.1542 data: 0.0651 max mem: 9377 +Train: [24] [5700/6250] eta: 0:01:29 lr: 0.000112 grad: 0.1444 (0.1592) loss: 0.7082 (0.7188) time: 0.1342 data: 0.0522 max mem: 9377 +Train: [24] [5800/6250] eta: 0:01:13 lr: 0.000112 grad: 0.1461 (0.1592) loss: 0.7172 (0.7186) time: 0.1639 data: 0.0772 max mem: 9377 +Train: [24] [5900/6250] eta: 0:00:57 lr: 0.000112 grad: 0.1399 (0.1591) loss: 0.7207 (0.7186) time: 0.1579 data: 0.0765 max mem: 9377 +Train: [24] [6000/6250] eta: 0:00:40 lr: 0.000112 grad: 0.1488 (0.1590) loss: 0.7100 (0.7185) time: 0.1846 data: 0.0967 max mem: 9377 +Train: [24] [6100/6250] eta: 0:00:24 lr: 0.000112 grad: 0.1470 (0.1588) loss: 0.7197 (0.7185) time: 0.1699 data: 0.0824 max mem: 9377 +Train: [24] [6200/6250] eta: 0:00:08 lr: 0.000112 grad: 0.1471 (0.1588) loss: 0.7138 (0.7184) time: 0.1804 data: 0.0950 max mem: 9377 +Train: [24] [6249/6250] eta: 0:00:00 lr: 0.000112 grad: 0.1529 (0.1587) loss: 0.7097 (0.7185) time: 0.1520 data: 0.0683 max mem: 9377 +Train: [24] Total time: 0:17:06 (0.1643 s / it) +Averaged stats: lr: 0.000112 grad: 0.1529 (0.1587) loss: 0.7097 (0.7185) +Eval (hcp-train-subset): [24] [ 0/62] eta: 0:04:02 loss: 0.8651 (0.8651) time: 3.9159 data: 3.8672 max mem: 9377 +Eval (hcp-train-subset): [24] [61/62] eta: 0:00:00 loss: 0.8735 (0.8756) time: 0.1542 data: 0.1273 max mem: 9377 +Eval (hcp-train-subset): [24] Total time: 0:00:15 (0.2444 s / it) +Averaged stats (hcp-train-subset): loss: 0.8735 (0.8756) +Making plots (hcp-train-subset): example=55 +Eval (hcp-val): [24] [ 0/62] eta: 0:05:05 loss: 0.8847 (0.8847) time: 4.9273 data: 4.8965 max mem: 9377 +Eval (hcp-val): [24] [61/62] eta: 0:00:00 loss: 0.8722 (0.8747) time: 0.1202 data: 0.0951 max mem: 9377 +Eval (hcp-val): [24] Total time: 0:00:15 (0.2431 s / it) +Averaged stats (hcp-val): loss: 0.8722 (0.8747) +Making plots (hcp-val): example=39 +Eval (nsd-val): [24] [ 0/62] eta: 0:05:41 loss: 0.8457 (0.8457) time: 5.5058 data: 5.4727 max mem: 9377 +Eval (nsd-val): [24] [61/62] eta: 0:00:00 loss: 0.8572 (0.8606) time: 0.1600 data: 0.1342 max mem: 9377 +Eval (nsd-val): [24] Total time: 0:00:15 (0.2460 s / it) +Averaged stats (nsd-val): loss: 0.8572 (0.8606) +Making plots (nsd-val): example=23 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00024.pth +Train: [25] [ 0/6250] eta: 12:00:39 lr: 0.000112 grad: 0.2712 (0.2712) loss: 0.6940 (0.6940) time: 6.9183 data: 6.8178 max mem: 9377 +Train: [25] [ 100/6250] eta: 0:24:41 lr: 0.000112 grad: 0.2565 (0.2964) loss: 0.7319 (0.7456) time: 0.1572 data: 0.0697 max mem: 9377 +Train: [25] [ 200/6250] eta: 0:21:07 lr: 0.000112 grad: 0.2381 (0.2951) loss: 0.7268 (0.7394) time: 0.1659 data: 0.0780 max mem: 9377 +Train: [25] [ 300/6250] eta: 0:19:39 lr: 0.000112 grad: 0.2417 (0.2808) loss: 0.7409 (0.7365) time: 0.1636 data: 0.0789 max mem: 9377 +Train: [25] [ 400/6250] eta: 0:18:33 lr: 0.000112 grad: 0.1741 (0.2599) loss: 0.7229 (0.7348) time: 0.1543 data: 0.0754 max mem: 9377 +Train: [25] [ 500/6250] eta: 0:18:09 lr: 0.000112 grad: 0.1600 (0.2432) loss: 0.7299 (0.7339) time: 0.1767 data: 0.0779 max mem: 9377 +Train: [25] [ 600/6250] eta: 0:17:36 lr: 0.000112 grad: 0.1596 (0.2303) loss: 0.7187 (0.7333) time: 0.1535 data: 0.0516 max mem: 9377 +Train: [25] [ 700/6250] eta: 0:17:11 lr: 0.000112 grad: 0.1533 (0.2211) loss: 0.7334 (0.7325) time: 0.1632 data: 0.0689 max mem: 9377 +Train: [25] [ 800/6250] eta: 0:16:42 lr: 0.000112 grad: 0.1577 (0.2140) loss: 0.7409 (0.7319) time: 0.1878 data: 0.0914 max mem: 9377 +Train: [25] [ 900/6250] eta: 0:16:17 lr: 0.000112 grad: 0.1583 (0.2087) loss: 0.7164 (0.7311) time: 0.1659 data: 0.0598 max mem: 9377 +Train: [25] [1000/6250] eta: 0:16:05 lr: 0.000112 grad: 0.1882 (0.2055) loss: 0.7278 (0.7308) time: 0.2210 data: 0.1371 max mem: 9377 +Train: [25] [1100/6250] eta: 0:15:43 lr: 0.000112 grad: 0.1631 (0.2032) loss: 0.7194 (0.7305) time: 0.1989 data: 0.0993 max mem: 9377 +Train: [25] [1200/6250] eta: 0:15:26 lr: 0.000112 grad: 0.1404 (0.1982) loss: 0.7362 (0.7311) time: 0.1651 data: 0.0632 max mem: 9377 +Train: [25] [1300/6250] eta: 0:15:02 lr: 0.000112 grad: 0.1452 (0.1941) loss: 0.7292 (0.7311) time: 0.1464 data: 0.0658 max mem: 9377 +Train: [25] [1400/6250] eta: 0:14:40 lr: 0.000112 grad: 0.1433 (0.1910) loss: 0.7337 (0.7310) time: 0.1750 data: 0.0854 max mem: 9377 +Train: [25] [1500/6250] eta: 0:14:17 lr: 0.000112 grad: 0.1447 (0.1880) loss: 0.7172 (0.7311) time: 0.1766 data: 0.0848 max mem: 9377 +Train: [25] [1600/6250] eta: 0:13:55 lr: 0.000111 grad: 0.1505 (0.1859) loss: 0.7326 (0.7309) time: 0.1605 data: 0.0660 max mem: 9377 +Train: [25] [1700/6250] eta: 0:13:39 lr: 0.000111 grad: 0.1510 (0.1838) loss: 0.7304 (0.7309) time: 0.1560 data: 0.0666 max mem: 9377 +Train: [25] [1800/6250] eta: 0:13:22 lr: 0.000111 grad: 0.1606 (0.1823) loss: 0.7290 (0.7310) time: 0.1893 data: 0.0924 max mem: 9377 +Train: [25] [1900/6250] eta: 0:13:03 lr: 0.000111 grad: 0.1415 (0.1807) loss: 0.7322 (0.7310) time: 0.1691 data: 0.0746 max mem: 9377 +Train: [25] [2000/6250] eta: 0:12:41 lr: 0.000111 grad: 0.1444 (0.1789) loss: 0.7340 (0.7310) time: 0.1767 data: 0.0923 max mem: 9377 +Train: [25] [2100/6250] eta: 0:12:20 lr: 0.000111 grad: 0.1421 (0.1773) loss: 0.7239 (0.7311) time: 0.1813 data: 0.0866 max mem: 9377 +Train: [25] [2200/6250] eta: 0:12:02 lr: 0.000111 grad: 0.1449 (0.1761) loss: 0.7297 (0.7311) time: 0.1645 data: 0.0679 max mem: 9377 +Train: [25] [2300/6250] eta: 0:11:43 lr: 0.000111 grad: 0.1503 (0.1749) loss: 0.7349 (0.7312) time: 0.1578 data: 0.0514 max mem: 9377 +Train: [25] [2400/6250] eta: 0:11:22 lr: 0.000111 grad: 0.1502 (0.1742) loss: 0.7344 (0.7313) time: 0.1611 data: 0.0633 max mem: 9377 +Train: [25] [2500/6250] eta: 0:11:01 lr: 0.000111 grad: 0.1466 (0.1733) loss: 0.7334 (0.7313) time: 0.1276 data: 0.0310 max mem: 9377 +Train: [25] [2600/6250] eta: 0:10:40 lr: 0.000111 grad: 0.1510 (0.1723) loss: 0.7224 (0.7313) time: 0.1645 data: 0.0739 max mem: 9377 +Train: [25] [2700/6250] eta: 0:10:21 lr: 0.000111 grad: 0.1471 (0.1714) loss: 0.7423 (0.7312) time: 0.1739 data: 0.0854 max mem: 9377 +Train: [25] [2800/6250] eta: 0:10:02 lr: 0.000111 grad: 0.1447 (0.1704) loss: 0.7336 (0.7314) time: 0.1460 data: 0.0678 max mem: 9377 +Train: [25] [2900/6250] eta: 0:09:43 lr: 0.000111 grad: 0.1419 (0.1694) loss: 0.7441 (0.7318) time: 0.1607 data: 0.0730 max mem: 9377 +Train: [25] [3000/6250] eta: 0:09:26 lr: 0.000111 grad: 0.1426 (0.1685) loss: 0.7385 (0.7321) time: 0.1319 data: 0.0292 max mem: 9377 +Train: [25] [3100/6250] eta: 0:09:08 lr: 0.000111 grad: 0.1345 (0.1676) loss: 0.7427 (0.7323) time: 0.1650 data: 0.0767 max mem: 9377 +Train: [25] [3200/6250] eta: 0:08:50 lr: 0.000111 grad: 0.1338 (0.1669) loss: 0.7445 (0.7324) time: 0.1386 data: 0.0587 max mem: 9377 +Train: [25] [3300/6250] eta: 0:08:32 lr: 0.000111 grad: 0.1380 (0.1661) loss: 0.7257 (0.7324) time: 0.1701 data: 0.0852 max mem: 9377 +Train: [25] [3400/6250] eta: 0:08:14 lr: 0.000111 grad: 0.1358 (0.1655) loss: 0.7453 (0.7324) time: 0.1555 data: 0.0770 max mem: 9377 +Train: [25] [3500/6250] eta: 0:07:55 lr: 0.000111 grad: 0.1501 (0.1648) loss: 0.7266 (0.7323) time: 0.1912 data: 0.1106 max mem: 9377 +Train: [25] [3600/6250] eta: 0:07:36 lr: 0.000111 grad: 0.1406 (0.1642) loss: 0.7295 (0.7322) time: 0.1609 data: 0.0747 max mem: 9377 +Train: [25] [3700/6250] eta: 0:07:17 lr: 0.000111 grad: 0.1468 (0.1638) loss: 0.7154 (0.7319) time: 0.1464 data: 0.0599 max mem: 9377 +Train: [25] [3800/6250] eta: 0:06:59 lr: 0.000111 grad: 0.1497 (0.1633) loss: 0.7215 (0.7317) time: 0.1552 data: 0.0615 max mem: 9377 +Train: [25] [3900/6250] eta: 0:06:41 lr: 0.000111 grad: 0.1435 (0.1629) loss: 0.7139 (0.7315) time: 0.1660 data: 0.0891 max mem: 9377 +Train: [25] [4000/6250] eta: 0:06:23 lr: 0.000111 grad: 0.1461 (0.1626) loss: 0.7235 (0.7311) time: 0.1422 data: 0.0657 max mem: 9377 +Train: [25] [4100/6250] eta: 0:06:06 lr: 0.000111 grad: 0.1586 (0.1624) loss: 0.7040 (0.7306) time: 0.1770 data: 0.0993 max mem: 9377 +Train: [25] [4200/6250] eta: 0:05:49 lr: 0.000111 grad: 0.1412 (0.1620) loss: 0.7107 (0.7304) time: 0.1806 data: 0.1035 max mem: 9377 +Train: [25] [4300/6250] eta: 0:05:31 lr: 0.000111 grad: 0.1506 (0.1617) loss: 0.7242 (0.7303) time: 0.1557 data: 0.0658 max mem: 9377 +Train: [25] [4400/6250] eta: 0:05:14 lr: 0.000111 grad: 0.1605 (0.1614) loss: 0.7171 (0.7301) time: 0.1925 data: 0.1087 max mem: 9377 +Train: [25] [4500/6250] eta: 0:04:57 lr: 0.000111 grad: 0.1467 (0.1610) loss: 0.7276 (0.7300) time: 0.1961 data: 0.1122 max mem: 9377 +Train: [25] [4600/6250] eta: 0:04:39 lr: 0.000111 grad: 0.1470 (0.1607) loss: 0.7369 (0.7300) time: 0.1464 data: 0.0666 max mem: 9377 +Train: [25] [4700/6250] eta: 0:04:21 lr: 0.000111 grad: 0.1391 (0.1604) loss: 0.7327 (0.7300) time: 0.1503 data: 0.0580 max mem: 9377 +Train: [25] [4800/6250] eta: 0:04:04 lr: 0.000111 grad: 0.1428 (0.1600) loss: 0.7240 (0.7300) time: 0.2123 data: 0.0520 max mem: 9377 +Train: [25] [4900/6250] eta: 0:03:47 lr: 0.000111 grad: 0.1421 (0.1597) loss: 0.7374 (0.7300) time: 0.1404 data: 0.0535 max mem: 9377 +Train: [25] [5000/6250] eta: 0:03:30 lr: 0.000111 grad: 0.1430 (0.1596) loss: 0.7354 (0.7299) time: 0.1821 data: 0.0953 max mem: 9377 +Train: [25] [5100/6250] eta: 0:03:13 lr: 0.000111 grad: 0.1439 (0.1593) loss: 0.7270 (0.7299) time: 0.1347 data: 0.0435 max mem: 9377 +Train: [25] [5200/6250] eta: 0:02:56 lr: 0.000111 grad: 0.1463 (0.1591) loss: 0.7380 (0.7297) time: 0.1672 data: 0.0932 max mem: 9377 +Train: [25] [5300/6250] eta: 0:02:39 lr: 0.000111 grad: 0.1477 (0.1589) loss: 0.7117 (0.7297) time: 0.1704 data: 0.0835 max mem: 9377 +Train: [25] [5400/6250] eta: 0:02:22 lr: 0.000111 grad: 0.1479 (0.1587) loss: 0.7118 (0.7296) time: 0.1761 data: 0.0825 max mem: 9377 +Train: [25] [5500/6250] eta: 0:02:05 lr: 0.000111 grad: 0.1448 (0.1585) loss: 0.7091 (0.7295) time: 0.1615 data: 0.0759 max mem: 9377 +Train: [25] [5600/6250] eta: 0:01:48 lr: 0.000111 grad: 0.1390 (0.1583) loss: 0.7195 (0.7294) time: 0.1755 data: 0.0864 max mem: 9377 +Train: [25] [5700/6250] eta: 0:01:31 lr: 0.000111 grad: 0.1513 (0.1582) loss: 0.7137 (0.7292) time: 0.1514 data: 0.0620 max mem: 9377 +Train: [25] [5800/6250] eta: 0:01:15 lr: 0.000111 grad: 0.1499 (0.1581) loss: 0.7195 (0.7290) time: 0.1520 data: 0.0642 max mem: 9377 +Train: [25] [5900/6250] eta: 0:00:58 lr: 0.000111 grad: 0.1513 (0.1581) loss: 0.7076 (0.7288) time: 0.1504 data: 0.0628 max mem: 9377 +Train: [25] [6000/6250] eta: 0:00:41 lr: 0.000111 grad: 0.1529 (0.1580) loss: 0.7261 (0.7286) time: 0.1602 data: 0.0709 max mem: 9377 +Train: [25] [6100/6250] eta: 0:00:25 lr: 0.000111 grad: 0.1478 (0.1579) loss: 0.7040 (0.7284) time: 0.1582 data: 0.0721 max mem: 9377 +Train: [25] [6200/6250] eta: 0:00:08 lr: 0.000111 grad: 0.1494 (0.1578) loss: 0.7186 (0.7282) time: 0.1792 data: 0.0918 max mem: 9377 +Train: [25] [6249/6250] eta: 0:00:00 lr: 0.000111 grad: 0.1481 (0.1578) loss: 0.7223 (0.7281) time: 0.1499 data: 0.0499 max mem: 9377 +Train: [25] Total time: 0:17:26 (0.1674 s / it) +Averaged stats: lr: 0.000111 grad: 0.1481 (0.1578) loss: 0.7223 (0.7281) +Eval (hcp-train-subset): [25] [ 0/62] eta: 0:04:00 loss: 0.8685 (0.8685) time: 3.8710 data: 3.8147 max mem: 9377 +Eval (hcp-train-subset): [25] [61/62] eta: 0:00:00 loss: 0.8762 (0.8761) time: 0.1511 data: 0.1257 max mem: 9377 +Eval (hcp-train-subset): [25] Total time: 0:00:14 (0.2334 s / it) +Averaged stats (hcp-train-subset): loss: 0.8762 (0.8761) +Eval (hcp-val): [25] [ 0/62] eta: 0:04:14 loss: 0.8717 (0.8717) time: 4.1047 data: 4.0254 max mem: 9377 +Eval (hcp-val): [25] [61/62] eta: 0:00:00 loss: 0.8734 (0.8755) time: 0.1539 data: 0.1272 max mem: 9377 +Eval (hcp-val): [25] Total time: 0:00:14 (0.2388 s / it) +Averaged stats (hcp-val): loss: 0.8734 (0.8755) +Eval (nsd-val): [25] [ 0/62] eta: 0:05:53 loss: 0.8458 (0.8458) time: 5.7006 data: 5.6691 max mem: 9377 +Eval (nsd-val): [25] [61/62] eta: 0:00:00 loss: 0.8560 (0.8590) time: 0.1223 data: 0.0948 max mem: 9377 +Eval (nsd-val): [25] Total time: 0:00:14 (0.2351 s / it) +Averaged stats (nsd-val): loss: 0.8560 (0.8590) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [26] [ 0/6250] eta: 12:02:00 lr: 0.000111 grad: 0.1985 (0.1985) loss: 0.6835 (0.6835) time: 6.9313 data: 6.8365 max mem: 9377 +Train: [26] [ 100/6250] eta: 0:23:39 lr: 0.000111 grad: 0.3822 (0.3231) loss: 0.7290 (0.7405) time: 0.1794 data: 0.0540 max mem: 9377 +Train: [26] [ 200/6250] eta: 0:20:23 lr: 0.000110 grad: 0.2669 (0.3387) loss: 0.7190 (0.7308) time: 0.1558 data: 0.0708 max mem: 9377 +Train: [26] [ 300/6250] eta: 0:19:21 lr: 0.000110 grad: 0.2132 (0.3016) loss: 0.7221 (0.7305) time: 0.1731 data: 0.0869 max mem: 9377 +Train: [26] [ 400/6250] eta: 0:18:19 lr: 0.000110 grad: 0.2032 (0.2745) loss: 0.7410 (0.7308) time: 0.1585 data: 0.0772 max mem: 9377 +Train: [26] [ 500/6250] eta: 0:17:42 lr: 0.000110 grad: 0.1737 (0.2563) loss: 0.7237 (0.7315) time: 0.1546 data: 0.0641 max mem: 9377 +Train: [26] [ 600/6250] eta: 0:17:12 lr: 0.000110 grad: 0.1642 (0.2412) loss: 0.7309 (0.7313) time: 0.1817 data: 0.0820 max mem: 9377 +Train: [26] [ 700/6250] eta: 0:16:41 lr: 0.000110 grad: 0.1709 (0.2352) loss: 0.7387 (0.7307) time: 0.1625 data: 0.0608 max mem: 9377 +Train: [26] [ 800/6250] eta: 0:16:11 lr: 0.000110 grad: 0.1823 (0.2290) loss: 0.7142 (0.7299) time: 0.1478 data: 0.0641 max mem: 9377 +Train: [26] [ 900/6250] eta: 0:15:44 lr: 0.000110 grad: 0.1560 (0.2228) loss: 0.7385 (0.7297) time: 0.1720 data: 0.0715 max mem: 9377 +Train: [26] [1000/6250] eta: 0:15:17 lr: 0.000110 grad: 0.1632 (0.2170) loss: 0.7298 (0.7289) time: 0.1973 data: 0.1073 max mem: 9377 +Train: [26] [1100/6250] eta: 0:14:53 lr: 0.000110 grad: 0.1731 (0.2130) loss: 0.7111 (0.7282) time: 0.1470 data: 0.0660 max mem: 9377 +Train: [26] [1200/6250] eta: 0:14:31 lr: 0.000110 grad: 0.1713 (0.2093) loss: 0.7152 (0.7277) time: 0.1821 data: 0.0852 max mem: 9377 +Train: [26] [1300/6250] eta: 0:14:11 lr: 0.000110 grad: 0.1502 (0.2057) loss: 0.7178 (0.7270) time: 0.1639 data: 0.0742 max mem: 9377 +Train: [26] [1400/6250] eta: 0:13:48 lr: 0.000110 grad: 0.1462 (0.2021) loss: 0.7213 (0.7264) time: 0.1541 data: 0.0734 max mem: 9377 +Train: [26] [1500/6250] eta: 0:13:29 lr: 0.000110 grad: 0.1531 (0.1990) loss: 0.7155 (0.7258) time: 0.1510 data: 0.0667 max mem: 9377 +Train: [26] [1600/6250] eta: 0:13:10 lr: 0.000110 grad: 0.1509 (0.1961) loss: 0.7071 (0.7253) time: 0.1513 data: 0.0530 max mem: 9377 +Train: [26] [1700/6250] eta: 0:12:50 lr: 0.000110 grad: 0.1457 (0.1935) loss: 0.7116 (0.7246) time: 0.1545 data: 0.0584 max mem: 9377 +Train: [26] [1800/6250] eta: 0:12:30 lr: 0.000110 grad: 0.1587 (0.1915) loss: 0.7236 (0.7241) time: 0.1613 data: 0.0727 max mem: 9377 +Train: [26] [1900/6250] eta: 0:12:16 lr: 0.000110 grad: 0.1460 (0.1894) loss: 0.7101 (0.7239) time: 0.1548 data: 0.0700 max mem: 9377 +Train: [26] [2000/6250] eta: 0:11:56 lr: 0.000110 grad: 0.1519 (0.1875) loss: 0.7115 (0.7236) time: 0.1442 data: 0.0625 max mem: 9377 +Train: [26] [2100/6250] eta: 0:11:38 lr: 0.000110 grad: 0.1448 (0.1858) loss: 0.7094 (0.7234) time: 0.1324 data: 0.0368 max mem: 9377 +Train: [26] [2200/6250] eta: 0:11:21 lr: 0.000110 grad: 0.1522 (0.1840) loss: 0.7052 (0.7234) time: 0.1517 data: 0.0592 max mem: 9377 +Train: [26] [2300/6250] eta: 0:11:02 lr: 0.000110 grad: 0.1495 (0.1824) loss: 0.7081 (0.7230) time: 0.1730 data: 0.0801 max mem: 9377 +Train: [26] [2400/6250] eta: 0:10:42 lr: 0.000110 grad: 0.1466 (0.1811) loss: 0.7229 (0.7227) time: 0.1702 data: 0.0781 max mem: 9377 +Train: [26] [2500/6250] eta: 0:10:23 lr: 0.000110 grad: 0.1489 (0.1800) loss: 0.7079 (0.7224) time: 0.1455 data: 0.0515 max mem: 9377 +Train: [26] [2600/6250] eta: 0:10:03 lr: 0.000110 grad: 0.1450 (0.1789) loss: 0.7093 (0.7221) time: 0.1526 data: 0.0622 max mem: 9377 +Train: [26] [2700/6250] eta: 0:09:45 lr: 0.000110 grad: 0.1435 (0.1777) loss: 0.7197 (0.7218) time: 0.1568 data: 0.0734 max mem: 9377 +Train: [26] [2800/6250] eta: 0:09:27 lr: 0.000110 grad: 0.1410 (0.1767) loss: 0.7222 (0.7215) time: 0.1485 data: 0.0625 max mem: 9377 +Train: [26] [2900/6250] eta: 0:09:10 lr: 0.000110 grad: 0.1454 (0.1757) loss: 0.7212 (0.7215) time: 0.1536 data: 0.0658 max mem: 9377 +Train: [26] [3000/6250] eta: 0:08:54 lr: 0.000110 grad: 0.1435 (0.1747) loss: 0.7301 (0.7214) time: 0.1613 data: 0.0822 max mem: 9377 +Train: [26] [3100/6250] eta: 0:08:37 lr: 0.000110 grad: 0.1553 (0.1740) loss: 0.7054 (0.7212) time: 0.1661 data: 0.0827 max mem: 9377 +Train: [26] [3200/6250] eta: 0:08:20 lr: 0.000110 grad: 0.1407 (0.1732) loss: 0.7147 (0.7209) time: 0.1690 data: 0.0882 max mem: 9377 +Train: [26] [3300/6250] eta: 0:08:03 lr: 0.000110 grad: 0.1472 (0.1724) loss: 0.6964 (0.7206) time: 0.1720 data: 0.0875 max mem: 9377 +Train: [26] [3400/6250] eta: 0:07:46 lr: 0.000110 grad: 0.1482 (0.1718) loss: 0.7140 (0.7203) time: 0.1612 data: 0.0756 max mem: 9377 +Train: [26] [3500/6250] eta: 0:07:30 lr: 0.000110 grad: 0.1425 (0.1710) loss: 0.7258 (0.7202) time: 0.1871 data: 0.1005 max mem: 9377 +Train: [26] [3600/6250] eta: 0:07:13 lr: 0.000110 grad: 0.1410 (0.1704) loss: 0.7220 (0.7202) time: 0.1611 data: 0.0728 max mem: 9377 +Train: [26] [3700/6250] eta: 0:06:57 lr: 0.000110 grad: 0.1422 (0.1697) loss: 0.7254 (0.7202) time: 0.1697 data: 0.0873 max mem: 9377 +Train: [26] [3800/6250] eta: 0:06:40 lr: 0.000110 grad: 0.1433 (0.1693) loss: 0.7064 (0.7200) time: 0.1287 data: 0.0387 max mem: 9377 +Train: [26] [3900/6250] eta: 0:06:23 lr: 0.000110 grad: 0.1442 (0.1689) loss: 0.7225 (0.7198) time: 0.1577 data: 0.0731 max mem: 9377 +Train: [26] [4000/6250] eta: 0:06:06 lr: 0.000110 grad: 0.1450 (0.1684) loss: 0.7051 (0.7197) time: 0.1474 data: 0.0541 max mem: 9377 +Train: [26] [4100/6250] eta: 0:05:49 lr: 0.000110 grad: 0.1601 (0.1680) loss: 0.6932 (0.7196) time: 0.1414 data: 0.0493 max mem: 9377 +Train: [26] [4200/6250] eta: 0:05:33 lr: 0.000110 grad: 0.1485 (0.1677) loss: 0.7020 (0.7194) time: 0.1611 data: 0.0710 max mem: 9377 +Train: [26] [4300/6250] eta: 0:05:16 lr: 0.000110 grad: 0.1385 (0.1672) loss: 0.7380 (0.7194) time: 0.1572 data: 0.0686 max mem: 9377 +Train: [26] [4400/6250] eta: 0:05:00 lr: 0.000110 grad: 0.1508 (0.1669) loss: 0.7015 (0.7193) time: 0.1566 data: 0.0608 max mem: 9377 +Train: [26] [4500/6250] eta: 0:04:43 lr: 0.000110 grad: 0.1425 (0.1665) loss: 0.7201 (0.7193) time: 0.1620 data: 0.0781 max mem: 9377 +Train: [26] [4600/6250] eta: 0:04:26 lr: 0.000110 grad: 0.1629 (0.1662) loss: 0.7060 (0.7193) time: 0.1475 data: 0.0611 max mem: 9377 +Train: [26] [4700/6250] eta: 0:04:10 lr: 0.000110 grad: 0.1494 (0.1658) loss: 0.7255 (0.7193) time: 0.1502 data: 0.0539 max mem: 9377 +Train: [26] [4800/6250] eta: 0:03:54 lr: 0.000109 grad: 0.1542 (0.1654) loss: 0.7192 (0.7193) time: 0.1729 data: 0.0953 max mem: 9377 +Train: [26] [4900/6250] eta: 0:03:38 lr: 0.000109 grad: 0.1574 (0.1651) loss: 0.7041 (0.7192) time: 0.1758 data: 0.0911 max mem: 9377 +Train: [26] [5000/6250] eta: 0:03:22 lr: 0.000109 grad: 0.1514 (0.1648) loss: 0.7151 (0.7192) time: 0.1663 data: 0.0865 max mem: 9377 +Train: [26] [5100/6250] eta: 0:03:06 lr: 0.000109 grad: 0.1455 (0.1645) loss: 0.7121 (0.7191) time: 0.1489 data: 0.0577 max mem: 9377 +Train: [26] [5200/6250] eta: 0:02:50 lr: 0.000109 grad: 0.1435 (0.1642) loss: 0.7089 (0.7190) time: 0.1643 data: 0.0801 max mem: 9377 +Train: [26] [5300/6250] eta: 0:02:34 lr: 0.000109 grad: 0.1492 (0.1640) loss: 0.7152 (0.7189) time: 0.1479 data: 0.0654 max mem: 9377 +Train: [26] [5400/6250] eta: 0:02:17 lr: 0.000109 grad: 0.1452 (0.1637) loss: 0.7226 (0.7188) time: 0.1472 data: 0.0531 max mem: 9377 +Train: [26] [5500/6250] eta: 0:02:01 lr: 0.000109 grad: 0.1469 (0.1636) loss: 0.7114 (0.7187) time: 0.1589 data: 0.0760 max mem: 9377 +Train: [26] [5600/6250] eta: 0:01:45 lr: 0.000109 grad: 0.1478 (0.1633) loss: 0.7026 (0.7186) time: 0.1763 data: 0.0872 max mem: 9377 +Train: [26] [5700/6250] eta: 0:01:29 lr: 0.000109 grad: 0.1517 (0.1631) loss: 0.7229 (0.7186) time: 0.1699 data: 0.0820 max mem: 9377 +Train: [26] [5800/6250] eta: 0:01:12 lr: 0.000109 grad: 0.1431 (0.1629) loss: 0.7107 (0.7187) time: 0.1599 data: 0.0711 max mem: 9377 +Train: [26] [5900/6250] eta: 0:00:56 lr: 0.000109 grad: 0.1653 (0.1627) loss: 0.7217 (0.7186) time: 0.1262 data: 0.0411 max mem: 9377 +Train: [26] [6000/6250] eta: 0:00:40 lr: 0.000109 grad: 0.1513 (0.1626) loss: 0.7045 (0.7185) time: 0.1740 data: 0.0956 max mem: 9377 +Train: [26] [6100/6250] eta: 0:00:24 lr: 0.000109 grad: 0.1583 (0.1625) loss: 0.6933 (0.7183) time: 0.1408 data: 0.0538 max mem: 9377 +Train: [26] [6200/6250] eta: 0:00:08 lr: 0.000109 grad: 0.1526 (0.1623) loss: 0.7122 (0.7182) time: 0.1207 data: 0.0273 max mem: 9377 +Train: [26] [6249/6250] eta: 0:00:00 lr: 0.000109 grad: 0.1644 (0.1623) loss: 0.7072 (0.7181) time: 0.1727 data: 0.0796 max mem: 9377 +Train: [26] Total time: 0:16:56 (0.1626 s / it) +Averaged stats: lr: 0.000109 grad: 0.1644 (0.1623) loss: 0.7072 (0.7181) +Eval (hcp-train-subset): [26] [ 0/62] eta: 0:05:40 loss: 0.8682 (0.8682) time: 5.4921 data: 5.4622 max mem: 9377 +Eval (hcp-train-subset): [26] [61/62] eta: 0:00:00 loss: 0.8778 (0.8784) time: 0.1443 data: 0.1191 max mem: 9377 +Eval (hcp-train-subset): [26] Total time: 0:00:14 (0.2286 s / it) +Averaged stats (hcp-train-subset): loss: 0.8778 (0.8784) +Eval (hcp-val): [26] [ 0/62] eta: 0:03:28 loss: 0.8679 (0.8679) time: 3.3555 data: 3.2632 max mem: 9377 +Eval (hcp-val): [26] [61/62] eta: 0:00:00 loss: 0.8752 (0.8777) time: 0.1287 data: 0.1033 max mem: 9377 +Eval (hcp-val): [26] Total time: 0:00:14 (0.2330 s / it) +Averaged stats (hcp-val): loss: 0.8752 (0.8777) +Eval (nsd-val): [26] [ 0/62] eta: 0:05:58 loss: 0.8575 (0.8575) time: 5.7878 data: 5.7569 max mem: 9377 +Eval (nsd-val): [26] [61/62] eta: 0:00:00 loss: 0.8628 (0.8629) time: 0.1394 data: 0.1136 max mem: 9377 +Eval (nsd-val): [26] Total time: 0:00:14 (0.2344 s / it) +Averaged stats (nsd-val): loss: 0.8628 (0.8629) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [27] [ 0/6250] eta: 9:50:04 lr: 0.000109 grad: 0.1249 (0.1249) loss: 0.8188 (0.8188) time: 5.6647 data: 5.3980 max mem: 9377 +Train: [27] [ 100/6250] eta: 0:24:07 lr: 0.000109 grad: 0.3310 (0.3558) loss: 0.7441 (0.7399) time: 0.2212 data: 0.1021 max mem: 9377 +Train: [27] [ 200/6250] eta: 0:20:28 lr: 0.000109 grad: 0.2714 (0.3116) loss: 0.7270 (0.7368) time: 0.1582 data: 0.0646 max mem: 9377 +Train: [27] [ 300/6250] eta: 0:19:20 lr: 0.000109 grad: 0.2083 (0.3046) loss: 0.7115 (0.7328) time: 0.1792 data: 0.0832 max mem: 9377 +Train: [27] [ 400/6250] eta: 0:18:04 lr: 0.000109 grad: 0.1866 (0.2779) loss: 0.7172 (0.7299) time: 0.1449 data: 0.0602 max mem: 9377 +Train: [27] [ 500/6250] eta: 0:17:42 lr: 0.000109 grad: 0.1850 (0.2641) loss: 0.7095 (0.7258) time: 0.1712 data: 0.0773 max mem: 9377 +Train: [27] [ 600/6250] eta: 0:17:14 lr: 0.000109 grad: 0.1779 (0.2533) loss: 0.7238 (0.7231) time: 0.1875 data: 0.0958 max mem: 9377 +Train: [27] [ 700/6250] eta: 0:16:39 lr: 0.000109 grad: 0.1590 (0.2439) loss: 0.7192 (0.7221) time: 0.1877 data: 0.0917 max mem: 9377 +Train: [27] [ 800/6250] eta: 0:16:08 lr: 0.000109 grad: 0.1515 (0.2339) loss: 0.7110 (0.7214) time: 0.1745 data: 0.0732 max mem: 9377 +Train: [27] [ 900/6250] eta: 0:15:38 lr: 0.000109 grad: 0.1487 (0.2254) loss: 0.7234 (0.7213) time: 0.1372 data: 0.0467 max mem: 9377 +Train: [27] [1000/6250] eta: 0:15:11 lr: 0.000109 grad: 0.1498 (0.2183) loss: 0.7150 (0.7211) time: 0.1460 data: 0.0600 max mem: 9377 +Train: [27] [1100/6250] eta: 0:14:46 lr: 0.000109 grad: 0.1497 (0.2124) loss: 0.7242 (0.7211) time: 0.1508 data: 0.0678 max mem: 9377 +Train: [27] [1200/6250] eta: 0:14:27 lr: 0.000109 grad: 0.1435 (0.2071) loss: 0.7240 (0.7211) time: 0.1647 data: 0.0719 max mem: 9377 +Train: [27] [1300/6250] eta: 0:14:07 lr: 0.000109 grad: 0.1500 (0.2028) loss: 0.7083 (0.7209) time: 0.1398 data: 0.0439 max mem: 9377 +Train: [27] [1400/6250] eta: 0:13:47 lr: 0.000109 grad: 0.1505 (0.1994) loss: 0.7062 (0.7202) time: 0.1384 data: 0.0562 max mem: 9377 +Train: [27] [1500/6250] eta: 0:13:28 lr: 0.000109 grad: 0.1449 (0.1962) loss: 0.7124 (0.7196) time: 0.1646 data: 0.0738 max mem: 9377 +Train: [27] [1600/6250] eta: 0:13:09 lr: 0.000109 grad: 0.1528 (0.1934) loss: 0.7003 (0.7192) time: 0.1669 data: 0.0644 max mem: 9377 +Train: [27] [1700/6250] eta: 0:12:50 lr: 0.000109 grad: 0.1522 (0.1909) loss: 0.7188 (0.7189) time: 0.1577 data: 0.0707 max mem: 9377 +Train: [27] [1800/6250] eta: 0:12:29 lr: 0.000109 grad: 0.1450 (0.1889) loss: 0.7150 (0.7185) time: 0.1561 data: 0.0546 max mem: 9377 +Train: [27] [1900/6250] eta: 0:12:08 lr: 0.000109 grad: 0.1550 (0.1873) loss: 0.7220 (0.7182) time: 0.1508 data: 0.0661 max mem: 9377 +Train: [27] [2000/6250] eta: 0:11:50 lr: 0.000109 grad: 0.1512 (0.1855) loss: 0.7080 (0.7180) time: 0.1585 data: 0.0759 max mem: 9377 +Train: [27] [2100/6250] eta: 0:11:32 lr: 0.000109 grad: 0.1487 (0.1839) loss: 0.7095 (0.7179) time: 0.1484 data: 0.0700 max mem: 9377 +Train: [27] [2200/6250] eta: 0:11:13 lr: 0.000109 grad: 0.1439 (0.1824) loss: 0.7136 (0.7177) time: 0.1449 data: 0.0522 max mem: 9377 +Train: [27] [2300/6250] eta: 0:10:56 lr: 0.000109 grad: 0.1472 (0.1809) loss: 0.7151 (0.7176) time: 0.1404 data: 0.0441 max mem: 9377 +Train: [27] [2400/6250] eta: 0:10:38 lr: 0.000109 grad: 0.1436 (0.1796) loss: 0.7167 (0.7174) time: 0.1496 data: 0.0478 max mem: 9377 +Train: [27] [2500/6250] eta: 0:10:19 lr: 0.000109 grad: 0.1613 (0.1786) loss: 0.6982 (0.7171) time: 0.1487 data: 0.0563 max mem: 9377 +Train: [27] [2600/6250] eta: 0:10:00 lr: 0.000109 grad: 0.1543 (0.1775) loss: 0.7017 (0.7169) time: 0.1546 data: 0.0557 max mem: 9377 +Train: [27] [2700/6250] eta: 0:09:42 lr: 0.000109 grad: 0.1562 (0.1766) loss: 0.7073 (0.7165) time: 0.1615 data: 0.0679 max mem: 9377 +Train: [27] [2800/6250] eta: 0:09:23 lr: 0.000109 grad: 0.1431 (0.1757) loss: 0.7023 (0.7162) time: 0.1417 data: 0.0473 max mem: 9377 +Train: [27] [2900/6250] eta: 0:09:07 lr: 0.000109 grad: 0.1436 (0.1749) loss: 0.7217 (0.7160) time: 0.1583 data: 0.0747 max mem: 9377 +Train: [27] [3000/6250] eta: 0:08:50 lr: 0.000109 grad: 0.1593 (0.1743) loss: 0.7016 (0.7158) time: 0.1502 data: 0.0566 max mem: 9377 +Train: [27] [3100/6250] eta: 0:08:34 lr: 0.000108 grad: 0.1553 (0.1735) loss: 0.7039 (0.7156) time: 0.1682 data: 0.0848 max mem: 9377 +Train: [27] [3200/6250] eta: 0:08:18 lr: 0.000108 grad: 0.1426 (0.1728) loss: 0.7086 (0.7155) time: 0.2110 data: 0.1234 max mem: 9377 +Train: [27] [3300/6250] eta: 0:08:01 lr: 0.000108 grad: 0.1443 (0.1720) loss: 0.7215 (0.7155) time: 0.1735 data: 0.0883 max mem: 9377 +Train: [27] [3400/6250] eta: 0:07:44 lr: 0.000108 grad: 0.1418 (0.1714) loss: 0.7053 (0.7154) time: 0.1683 data: 0.0798 max mem: 9377 +Train: [27] [3500/6250] eta: 0:07:28 lr: 0.000108 grad: 0.1446 (0.1707) loss: 0.7191 (0.7154) time: 0.1438 data: 0.0614 max mem: 9377 +Train: [27] [3600/6250] eta: 0:07:13 lr: 0.000108 grad: 0.1468 (0.1702) loss: 0.7301 (0.7154) time: 0.1674 data: 0.0839 max mem: 9377 +Train: [27] [3700/6250] eta: 0:06:56 lr: 0.000108 grad: 0.1456 (0.1697) loss: 0.7108 (0.7154) time: 0.0960 data: 0.0003 max mem: 9377 +Train: [27] [3800/6250] eta: 0:06:39 lr: 0.000108 grad: 0.1445 (0.1692) loss: 0.7127 (0.7152) time: 0.1756 data: 0.0899 max mem: 9377 +Train: [27] [3900/6250] eta: 0:06:22 lr: 0.000108 grad: 0.1461 (0.1687) loss: 0.7207 (0.7151) time: 0.1546 data: 0.0671 max mem: 9377 +Train: [27] [4000/6250] eta: 0:06:06 lr: 0.000108 grad: 0.1604 (0.1683) loss: 0.7006 (0.7150) time: 0.1712 data: 0.0881 max mem: 9377 +Train: [27] [4100/6250] eta: 0:05:49 lr: 0.000108 grad: 0.1532 (0.1679) loss: 0.7054 (0.7150) time: 0.1809 data: 0.0907 max mem: 9377 +Train: [27] [4200/6250] eta: 0:05:33 lr: 0.000108 grad: 0.1529 (0.1675) loss: 0.7085 (0.7149) time: 0.1862 data: 0.1011 max mem: 9377 +Train: [27] [4300/6250] eta: 0:05:16 lr: 0.000108 grad: 0.1571 (0.1673) loss: 0.7014 (0.7147) time: 0.1412 data: 0.0505 max mem: 9377 +Train: [27] [4400/6250] eta: 0:05:00 lr: 0.000108 grad: 0.1448 (0.1670) loss: 0.7204 (0.7147) time: 0.1558 data: 0.0666 max mem: 9377 +Train: [27] [4500/6250] eta: 0:04:43 lr: 0.000108 grad: 0.1521 (0.1666) loss: 0.7142 (0.7147) time: 0.1547 data: 0.0592 max mem: 9377 +Train: [27] [4600/6250] eta: 0:04:27 lr: 0.000108 grad: 0.1505 (0.1662) loss: 0.7194 (0.7148) time: 0.1451 data: 0.0490 max mem: 9377 +Train: [27] [4700/6250] eta: 0:04:11 lr: 0.000108 grad: 0.1486 (0.1659) loss: 0.7149 (0.7149) time: 0.1694 data: 0.0943 max mem: 9377 +Train: [27] [4800/6250] eta: 0:03:54 lr: 0.000108 grad: 0.1480 (0.1656) loss: 0.7097 (0.7149) time: 0.1648 data: 0.0821 max mem: 9377 +Train: [27] [4900/6250] eta: 0:03:38 lr: 0.000108 grad: 0.1448 (0.1653) loss: 0.7197 (0.7151) time: 0.1688 data: 0.0779 max mem: 9377 +Train: [27] [5000/6250] eta: 0:03:21 lr: 0.000108 grad: 0.1432 (0.1650) loss: 0.7291 (0.7152) time: 0.1576 data: 0.0800 max mem: 9377 +Train: [27] [5100/6250] eta: 0:03:05 lr: 0.000108 grad: 0.1405 (0.1646) loss: 0.7256 (0.7154) time: 0.1523 data: 0.0678 max mem: 9377 +Train: [27] [5200/6250] eta: 0:02:49 lr: 0.000108 grad: 0.1505 (0.1643) loss: 0.7260 (0.7155) time: 0.1281 data: 0.0332 max mem: 9377 +Train: [27] [5300/6250] eta: 0:02:32 lr: 0.000108 grad: 0.1490 (0.1641) loss: 0.7075 (0.7155) time: 0.1290 data: 0.0435 max mem: 9377 +Train: [27] [5400/6250] eta: 0:02:16 lr: 0.000108 grad: 0.1437 (0.1638) loss: 0.7177 (0.7156) time: 0.1617 data: 0.0777 max mem: 9377 +Train: [27] [5500/6250] eta: 0:02:00 lr: 0.000108 grad: 0.1456 (0.1636) loss: 0.7267 (0.7156) time: 0.1624 data: 0.0757 max mem: 9377 +Train: [27] [5600/6250] eta: 0:01:44 lr: 0.000108 grad: 0.1452 (0.1633) loss: 0.7176 (0.7158) time: 0.1485 data: 0.0577 max mem: 9377 +Train: [27] [5700/6250] eta: 0:01:28 lr: 0.000108 grad: 0.1427 (0.1630) loss: 0.7093 (0.7157) time: 0.1425 data: 0.0572 max mem: 9377 +Train: [27] [5800/6250] eta: 0:01:12 lr: 0.000108 grad: 0.1455 (0.1628) loss: 0.7112 (0.7157) time: 0.1297 data: 0.0441 max mem: 9377 +Train: [27] [5900/6250] eta: 0:00:55 lr: 0.000108 grad: 0.1576 (0.1626) loss: 0.7038 (0.7157) time: 0.1402 data: 0.0565 max mem: 9377 +Train: [27] [6000/6250] eta: 0:00:39 lr: 0.000108 grad: 0.1484 (0.1624) loss: 0.6986 (0.7156) time: 0.1364 data: 0.0392 max mem: 9377 +Train: [27] [6100/6250] eta: 0:00:23 lr: 0.000108 grad: 0.1430 (0.1622) loss: 0.7156 (0.7156) time: 0.1541 data: 0.0638 max mem: 9377 +Train: [27] [6200/6250] eta: 0:00:07 lr: 0.000108 grad: 0.1471 (0.1621) loss: 0.7218 (0.7156) time: 0.1426 data: 0.0497 max mem: 9377 +Train: [27] [6249/6250] eta: 0:00:00 lr: 0.000108 grad: 0.1443 (0.1620) loss: 0.7291 (0.7157) time: 0.1599 data: 0.0769 max mem: 9377 +Train: [27] Total time: 0:16:45 (0.1608 s / it) +Averaged stats: lr: 0.000108 grad: 0.1443 (0.1620) loss: 0.7291 (0.7157) +Eval (hcp-train-subset): [27] [ 0/62] eta: 0:05:38 loss: 0.8726 (0.8726) time: 5.4676 data: 5.4369 max mem: 9377 +Eval (hcp-train-subset): [27] [61/62] eta: 0:00:00 loss: 0.8757 (0.8769) time: 0.1478 data: 0.1212 max mem: 9377 +Eval (hcp-train-subset): [27] Total time: 0:00:14 (0.2271 s / it) +Averaged stats (hcp-train-subset): loss: 0.8757 (0.8769) +Eval (hcp-val): [27] [ 0/62] eta: 0:03:31 loss: 0.8819 (0.8819) time: 3.4175 data: 3.3387 max mem: 9377 +Eval (hcp-val): [27] [61/62] eta: 0:00:00 loss: 0.8732 (0.8754) time: 0.1119 data: 0.0867 max mem: 9377 +Eval (hcp-val): [27] Total time: 0:00:15 (0.2456 s / it) +Averaged stats (hcp-val): loss: 0.8732 (0.8754) +Eval (nsd-val): [27] [ 0/62] eta: 0:03:23 loss: 0.8496 (0.8496) time: 3.2809 data: 3.1504 max mem: 9377 +Eval (nsd-val): [27] [61/62] eta: 0:00:00 loss: 0.8602 (0.8604) time: 0.1442 data: 0.1170 max mem: 9377 +Eval (nsd-val): [27] Total time: 0:00:14 (0.2362 s / it) +Averaged stats (nsd-val): loss: 0.8602 (0.8604) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [28] [ 0/6250] eta: 7:42:24 lr: 0.000108 grad: 0.1212 (0.1212) loss: 0.8468 (0.8468) time: 4.4391 data: 4.2167 max mem: 9377 +Train: [28] [ 100/6250] eta: 0:23:29 lr: 0.000108 grad: 0.2169 (0.2467) loss: 0.7635 (0.7617) time: 0.2059 data: 0.1017 max mem: 9377 +Train: [28] [ 200/6250] eta: 0:20:36 lr: 0.000108 grad: 0.2499 (0.2557) loss: 0.7397 (0.7542) time: 0.1769 data: 0.0859 max mem: 9377 +Train: [28] [ 300/6250] eta: 0:18:58 lr: 0.000108 grad: 0.2402 (0.2514) loss: 0.7306 (0.7453) time: 0.1312 data: 0.0475 max mem: 9377 +Train: [28] [ 400/6250] eta: 0:18:02 lr: 0.000108 grad: 0.1998 (0.2452) loss: 0.7215 (0.7394) time: 0.1440 data: 0.0594 max mem: 9377 +Train: [28] [ 500/6250] eta: 0:17:28 lr: 0.000108 grad: 0.2080 (0.2385) loss: 0.7219 (0.7353) time: 0.1753 data: 0.0884 max mem: 9377 +Train: [28] [ 600/6250] eta: 0:16:58 lr: 0.000108 grad: 0.2003 (0.2344) loss: 0.7045 (0.7307) time: 0.1756 data: 0.0745 max mem: 9377 +Train: [28] [ 700/6250] eta: 0:16:32 lr: 0.000108 grad: 0.1812 (0.2317) loss: 0.7093 (0.7282) time: 0.1505 data: 0.0428 max mem: 9377 +Train: [28] [ 800/6250] eta: 0:16:00 lr: 0.000108 grad: 0.1630 (0.2248) loss: 0.7108 (0.7266) time: 0.1564 data: 0.0613 max mem: 9377 +Train: [28] [ 900/6250] eta: 0:15:37 lr: 0.000108 grad: 0.1585 (0.2174) loss: 0.7117 (0.7242) time: 0.1413 data: 0.0324 max mem: 9377 +Train: [28] [1000/6250] eta: 0:15:08 lr: 0.000108 grad: 0.1482 (0.2107) loss: 0.7136 (0.7229) time: 0.1665 data: 0.0807 max mem: 9377 +Train: [28] [1100/6250] eta: 0:14:40 lr: 0.000108 grad: 0.1388 (0.2049) loss: 0.7238 (0.7222) time: 0.1413 data: 0.0495 max mem: 9377 +Train: [28] [1200/6250] eta: 0:14:24 lr: 0.000108 grad: 0.1442 (0.2009) loss: 0.7166 (0.7214) time: 0.1673 data: 0.0767 max mem: 9377 +Train: [28] [1300/6250] eta: 0:14:02 lr: 0.000107 grad: 0.1475 (0.1970) loss: 0.7110 (0.7209) time: 0.1570 data: 0.0709 max mem: 9377 +Train: [28] [1400/6250] eta: 0:13:40 lr: 0.000107 grad: 0.1507 (0.1936) loss: 0.7066 (0.7203) time: 0.1492 data: 0.0628 max mem: 9377 +Train: [28] [1500/6250] eta: 0:13:19 lr: 0.000107 grad: 0.1483 (0.1907) loss: 0.7139 (0.7198) time: 0.1751 data: 0.0888 max mem: 9377 +Train: [28] [1600/6250] eta: 0:13:00 lr: 0.000107 grad: 0.1436 (0.1880) loss: 0.7162 (0.7192) time: 0.1523 data: 0.0594 max mem: 9377 +Train: [28] [1700/6250] eta: 0:12:38 lr: 0.000107 grad: 0.1467 (0.1857) loss: 0.7122 (0.7186) time: 0.1497 data: 0.0563 max mem: 9377 +Train: [28] [1800/6250] eta: 0:12:17 lr: 0.000107 grad: 0.1466 (0.1837) loss: 0.7070 (0.7179) time: 0.1321 data: 0.0418 max mem: 9377 +Train: [28] [1900/6250] eta: 0:11:55 lr: 0.000107 grad: 0.1521 (0.1822) loss: 0.7122 (0.7174) time: 0.1439 data: 0.0407 max mem: 9377 +Train: [28] [2000/6250] eta: 0:11:38 lr: 0.000107 grad: 0.1488 (0.1806) loss: 0.7067 (0.7171) time: 0.1914 data: 0.1130 max mem: 9377 +Train: [28] [2100/6250] eta: 0:11:20 lr: 0.000107 grad: 0.1463 (0.1791) loss: 0.7055 (0.7171) time: 0.1542 data: 0.0647 max mem: 9377 +Train: [28] [2200/6250] eta: 0:11:02 lr: 0.000107 grad: 0.1432 (0.1778) loss: 0.7182 (0.7171) time: 0.1700 data: 0.0721 max mem: 9377 +Train: [28] [2300/6250] eta: 0:10:43 lr: 0.000107 grad: 0.1455 (0.1766) loss: 0.7126 (0.7170) time: 0.1303 data: 0.0377 max mem: 9377 +Train: [28] [2400/6250] eta: 0:10:25 lr: 0.000107 grad: 0.1449 (0.1754) loss: 0.7277 (0.7173) time: 0.1429 data: 0.0598 max mem: 9377 +Train: [28] [2500/6250] eta: 0:10:09 lr: 0.000107 grad: 0.1463 (0.1743) loss: 0.7200 (0.7174) time: 0.1656 data: 0.0783 max mem: 9377 +Train: [28] [2600/6250] eta: 0:09:50 lr: 0.000107 grad: 0.1449 (0.1732) loss: 0.7202 (0.7175) time: 0.1440 data: 0.0515 max mem: 9377 +Train: [28] [2700/6250] eta: 0:09:32 lr: 0.000107 grad: 0.1461 (0.1721) loss: 0.7233 (0.7179) time: 0.1253 data: 0.0339 max mem: 9377 +Train: [28] [2800/6250] eta: 0:09:15 lr: 0.000107 grad: 0.1410 (0.1711) loss: 0.7252 (0.7181) time: 0.1838 data: 0.1033 max mem: 9377 +Train: [28] [2900/6250] eta: 0:08:58 lr: 0.000107 grad: 0.1493 (0.1704) loss: 0.7364 (0.7181) time: 0.1602 data: 0.0833 max mem: 9377 +Train: [28] [3000/6250] eta: 0:08:41 lr: 0.000107 grad: 0.1377 (0.1697) loss: 0.7168 (0.7181) time: 0.1400 data: 0.0535 max mem: 9377 +Train: [28] [3100/6250] eta: 0:08:25 lr: 0.000107 grad: 0.1475 (0.1690) loss: 0.7215 (0.7180) time: 0.1782 data: 0.0905 max mem: 9377 +Train: [28] [3200/6250] eta: 0:08:09 lr: 0.000107 grad: 0.1462 (0.1683) loss: 0.7210 (0.7181) time: 0.1605 data: 0.0685 max mem: 9377 +Train: [28] [3300/6250] eta: 0:07:53 lr: 0.000107 grad: 0.1444 (0.1676) loss: 0.7127 (0.7182) time: 0.1729 data: 0.0756 max mem: 9377 +Train: [28] [3400/6250] eta: 0:07:36 lr: 0.000107 grad: 0.1557 (0.1671) loss: 0.6943 (0.7183) time: 0.1606 data: 0.0659 max mem: 9377 +Train: [28] [3500/6250] eta: 0:07:21 lr: 0.000107 grad: 0.1455 (0.1666) loss: 0.7253 (0.7185) time: 0.1970 data: 0.1106 max mem: 9377 +Train: [28] [3600/6250] eta: 0:07:06 lr: 0.000107 grad: 0.1469 (0.1661) loss: 0.7145 (0.7185) time: 0.1892 data: 0.0881 max mem: 9377 +Train: [28] [3700/6250] eta: 0:06:50 lr: 0.000107 grad: 0.1491 (0.1658) loss: 0.7152 (0.7184) time: 0.1786 data: 0.0939 max mem: 9377 +Train: [28] [3800/6250] eta: 0:06:34 lr: 0.000107 grad: 0.1456 (0.1653) loss: 0.7157 (0.7185) time: 0.1146 data: 0.0261 max mem: 9377 +Train: [28] [3900/6250] eta: 0:06:17 lr: 0.000107 grad: 0.1526 (0.1649) loss: 0.7285 (0.7186) time: 0.1468 data: 0.0647 max mem: 9377 +Train: [28] [4000/6250] eta: 0:06:01 lr: 0.000107 grad: 0.1540 (0.1645) loss: 0.7038 (0.7185) time: 0.1548 data: 0.0663 max mem: 9377 +Train: [28] [4100/6250] eta: 0:05:44 lr: 0.000107 grad: 0.1545 (0.1642) loss: 0.7152 (0.7184) time: 0.1642 data: 0.0708 max mem: 9377 +Train: [28] [4200/6250] eta: 0:05:28 lr: 0.000107 grad: 0.1559 (0.1640) loss: 0.7171 (0.7182) time: 0.1486 data: 0.0601 max mem: 9377 +Train: [28] [4300/6250] eta: 0:05:12 lr: 0.000107 grad: 0.1577 (0.1640) loss: 0.7072 (0.7182) time: 0.1488 data: 0.0642 max mem: 9377 +Train: [28] [4400/6250] eta: 0:04:55 lr: 0.000107 grad: 0.1528 (0.1638) loss: 0.7173 (0.7181) time: 0.1652 data: 0.0761 max mem: 9377 +Train: [28] [4500/6250] eta: 0:04:39 lr: 0.000107 grad: 0.1504 (0.1634) loss: 0.7181 (0.7181) time: 0.1447 data: 0.0499 max mem: 9377 +Train: [28] [4600/6250] eta: 0:04:23 lr: 0.000107 grad: 0.1471 (0.1633) loss: 0.7226 (0.7180) time: 0.1359 data: 0.0452 max mem: 9377 +Train: [28] [4700/6250] eta: 0:04:07 lr: 0.000107 grad: 0.1551 (0.1631) loss: 0.7053 (0.7179) time: 0.1620 data: 0.0707 max mem: 9377 +Train: [28] [4800/6250] eta: 0:03:51 lr: 0.000107 grad: 0.1564 (0.1630) loss: 0.7056 (0.7179) time: 0.1540 data: 0.0610 max mem: 9377 +Train: [28] [4900/6250] eta: 0:03:35 lr: 0.000107 grad: 0.1500 (0.1628) loss: 0.7117 (0.7178) time: 0.1391 data: 0.0373 max mem: 9377 +Train: [28] [5000/6250] eta: 0:03:19 lr: 0.000107 grad: 0.1538 (0.1627) loss: 0.7119 (0.7177) time: 0.1656 data: 0.0788 max mem: 9377 +Train: [28] [5100/6250] eta: 0:03:03 lr: 0.000107 grad: 0.1473 (0.1625) loss: 0.7047 (0.7175) time: 0.1624 data: 0.0616 max mem: 9377 +Train: [28] [5200/6250] eta: 0:02:47 lr: 0.000107 grad: 0.1535 (0.1623) loss: 0.7088 (0.7174) time: 0.1443 data: 0.0546 max mem: 9377 +Train: [28] [5300/6250] eta: 0:02:31 lr: 0.000107 grad: 0.1556 (0.1622) loss: 0.7035 (0.7173) time: 0.1611 data: 0.0777 max mem: 9377 +Train: [28] [5400/6250] eta: 0:02:15 lr: 0.000107 grad: 0.1486 (0.1620) loss: 0.7137 (0.7172) time: 0.1646 data: 0.0783 max mem: 9377 +Train: [28] [5500/6250] eta: 0:01:59 lr: 0.000107 grad: 0.1519 (0.1619) loss: 0.7116 (0.7171) time: 0.1376 data: 0.0450 max mem: 9377 +Train: [28] [5600/6250] eta: 0:01:43 lr: 0.000106 grad: 0.1516 (0.1618) loss: 0.7087 (0.7170) time: 0.1616 data: 0.0711 max mem: 9377 +Train: [28] [5700/6250] eta: 0:01:27 lr: 0.000106 grad: 0.1550 (0.1616) loss: 0.6900 (0.7169) time: 0.1580 data: 0.0723 max mem: 9377 +Train: [28] [5800/6250] eta: 0:01:11 lr: 0.000106 grad: 0.1495 (0.1615) loss: 0.7054 (0.7167) time: 0.1610 data: 0.0698 max mem: 9377 +Train: [28] [5900/6250] eta: 0:00:55 lr: 0.000106 grad: 0.1530 (0.1614) loss: 0.6939 (0.7166) time: 0.1805 data: 0.0979 max mem: 9377 +Train: [28] [6000/6250] eta: 0:00:39 lr: 0.000106 grad: 0.1560 (0.1613) loss: 0.7097 (0.7164) time: 0.1569 data: 0.0662 max mem: 9377 +Train: [28] [6100/6250] eta: 0:00:23 lr: 0.000106 grad: 0.1525 (0.1612) loss: 0.6939 (0.7162) time: 0.1568 data: 0.0661 max mem: 9377 +Train: [28] [6200/6250] eta: 0:00:07 lr: 0.000106 grad: 0.1528 (0.1611) loss: 0.7195 (0.7161) time: 0.1472 data: 0.0606 max mem: 9377 +Train: [28] [6249/6250] eta: 0:00:00 lr: 0.000106 grad: 0.1513 (0.1611) loss: 0.7141 (0.7160) time: 0.1676 data: 0.0798 max mem: 9377 +Train: [28] Total time: 0:16:39 (0.1600 s / it) +Averaged stats: lr: 0.000106 grad: 0.1513 (0.1611) loss: 0.7141 (0.7160) +Eval (hcp-train-subset): [28] [ 0/62] eta: 0:05:24 loss: 0.8697 (0.8697) time: 5.2315 data: 5.2016 max mem: 9377 +Eval (hcp-train-subset): [28] [61/62] eta: 0:00:00 loss: 0.8771 (0.8779) time: 0.1513 data: 0.1245 max mem: 9377 +Eval (hcp-train-subset): [28] Total time: 0:00:14 (0.2335 s / it) +Averaged stats (hcp-train-subset): loss: 0.8771 (0.8779) +Eval (hcp-val): [28] [ 0/62] eta: 0:03:43 loss: 0.8758 (0.8758) time: 3.6092 data: 3.5449 max mem: 9377 +Eval (hcp-val): [28] [61/62] eta: 0:00:00 loss: 0.8745 (0.8773) time: 0.1316 data: 0.1065 max mem: 9377 +Eval (hcp-val): [28] Total time: 0:00:14 (0.2338 s / it) +Averaged stats (hcp-val): loss: 0.8745 (0.8773) +Eval (nsd-val): [28] [ 0/62] eta: 0:03:36 loss: 0.8506 (0.8506) time: 3.4995 data: 3.4297 max mem: 9377 +Eval (nsd-val): [28] [61/62] eta: 0:00:00 loss: 0.8563 (0.8563) time: 0.1525 data: 0.1255 max mem: 9377 +Eval (nsd-val): [28] Total time: 0:00:14 (0.2338 s / it) +Averaged stats (nsd-val): loss: 0.8563 (0.8563) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [29] [ 0/6250] eta: 10:40:45 lr: 0.000106 grad: 0.1602 (0.1602) loss: 0.7834 (0.7834) time: 6.1513 data: 6.0308 max mem: 9377 +Train: [29] [ 100/6250] eta: 0:22:45 lr: 0.000106 grad: 0.3090 (0.2915) loss: 0.7162 (0.7624) time: 0.1511 data: 0.0411 max mem: 9377 +Train: [29] [ 200/6250] eta: 0:20:26 lr: 0.000106 grad: 0.2634 (0.3077) loss: 0.7130 (0.7408) time: 0.1930 data: 0.1062 max mem: 9377 +Train: [29] [ 300/6250] eta: 0:19:29 lr: 0.000106 grad: 0.2292 (0.2845) loss: 0.7174 (0.7345) time: 0.1843 data: 0.0901 max mem: 9377 +Train: [29] [ 400/6250] eta: 0:18:22 lr: 0.000106 grad: 0.2381 (0.2715) loss: 0.6820 (0.7269) time: 0.1750 data: 0.0804 max mem: 9377 +Train: [29] [ 500/6250] eta: 0:17:40 lr: 0.000106 grad: 0.2146 (0.2597) loss: 0.7062 (0.7240) time: 0.1657 data: 0.0778 max mem: 9377 +Train: [29] [ 600/6250] eta: 0:17:01 lr: 0.000106 grad: 0.1680 (0.2480) loss: 0.7034 (0.7217) time: 0.1621 data: 0.0664 max mem: 9377 +Train: [29] [ 700/6250] eta: 0:16:25 lr: 0.000106 grad: 0.1675 (0.2372) loss: 0.7013 (0.7201) time: 0.1609 data: 0.0618 max mem: 9377 +Train: [29] [ 800/6250] eta: 0:15:54 lr: 0.000106 grad: 0.1617 (0.2281) loss: 0.7188 (0.7194) time: 0.1465 data: 0.0494 max mem: 9377 +Train: [29] [ 900/6250] eta: 0:15:22 lr: 0.000106 grad: 0.1461 (0.2196) loss: 0.7273 (0.7192) time: 0.1355 data: 0.0307 max mem: 9377 +Train: [29] [1000/6250] eta: 0:14:52 lr: 0.000106 grad: 0.1469 (0.2124) loss: 0.7253 (0.7193) time: 0.1474 data: 0.0558 max mem: 9377 +Train: [29] [1100/6250] eta: 0:14:23 lr: 0.000106 grad: 0.1418 (0.2066) loss: 0.7290 (0.7190) time: 0.1290 data: 0.0439 max mem: 9377 +Train: [29] [1200/6250] eta: 0:13:59 lr: 0.000106 grad: 0.1402 (0.2014) loss: 0.7362 (0.7196) time: 0.1541 data: 0.0789 max mem: 9377 +Train: [29] [1300/6250] eta: 0:13:42 lr: 0.000106 grad: 0.1438 (0.1973) loss: 0.7119 (0.7197) time: 0.1577 data: 0.0713 max mem: 9377 +Train: [29] [1400/6250] eta: 0:13:24 lr: 0.000106 grad: 0.1524 (0.1940) loss: 0.7049 (0.7193) time: 0.1537 data: 0.0635 max mem: 9377 +Train: [29] [1500/6250] eta: 0:13:04 lr: 0.000106 grad: 0.1483 (0.1910) loss: 0.7003 (0.7185) time: 0.1549 data: 0.0591 max mem: 9377 +Train: [29] [1600/6250] eta: 0:12:44 lr: 0.000106 grad: 0.1423 (0.1884) loss: 0.7153 (0.7182) time: 0.1391 data: 0.0476 max mem: 9377 +Train: [29] [1700/6250] eta: 0:12:24 lr: 0.000106 grad: 0.1534 (0.1861) loss: 0.7022 (0.7178) time: 0.1407 data: 0.0471 max mem: 9377 +Train: [29] [1800/6250] eta: 0:12:04 lr: 0.000106 grad: 0.1416 (0.1840) loss: 0.7143 (0.7175) time: 0.1317 data: 0.0407 max mem: 9377 +Train: [29] [1900/6250] eta: 0:11:44 lr: 0.000106 grad: 0.1508 (0.1821) loss: 0.7180 (0.7172) time: 0.1297 data: 0.0275 max mem: 9377 +Train: [29] [2000/6250] eta: 0:11:25 lr: 0.000106 grad: 0.1466 (0.1804) loss: 0.7207 (0.7172) time: 0.1526 data: 0.0682 max mem: 9377 +Train: [29] [2100/6250] eta: 0:11:10 lr: 0.000106 grad: 0.1542 (0.1788) loss: 0.7085 (0.7172) time: 0.1581 data: 0.0723 max mem: 9377 +Train: [29] [2200/6250] eta: 0:10:54 lr: 0.000106 grad: 0.1498 (0.1775) loss: 0.7130 (0.7171) time: 0.1664 data: 0.0766 max mem: 9377 +Train: [29] [2300/6250] eta: 0:10:38 lr: 0.000106 grad: 0.1583 (0.1764) loss: 0.7100 (0.7170) time: 0.1722 data: 0.0840 max mem: 9377 +Train: [29] [2400/6250] eta: 0:10:21 lr: 0.000106 grad: 0.1446 (0.1754) loss: 0.7195 (0.7170) time: 0.1663 data: 0.0790 max mem: 9377 +Train: [29] [2500/6250] eta: 0:10:06 lr: 0.000106 grad: 0.1477 (0.1744) loss: 0.7057 (0.7170) time: 0.1688 data: 0.0715 max mem: 9377 +Train: [29] [2600/6250] eta: 0:09:48 lr: 0.000106 grad: 0.1489 (0.1735) loss: 0.7095 (0.7171) time: 0.1394 data: 0.0390 max mem: 9377 +Train: [29] [2700/6250] eta: 0:09:32 lr: 0.000106 grad: 0.1422 (0.1727) loss: 0.7202 (0.7171) time: 0.1785 data: 0.0786 max mem: 9377 +Train: [29] [2800/6250] eta: 0:09:15 lr: 0.000106 grad: 0.1493 (0.1719) loss: 0.7207 (0.7172) time: 0.1643 data: 0.0762 max mem: 9377 +Train: [29] [2900/6250] eta: 0:08:58 lr: 0.000106 grad: 0.1642 (0.1713) loss: 0.7153 (0.7171) time: 0.1634 data: 0.0706 max mem: 9377 +Train: [29] [3000/6250] eta: 0:08:41 lr: 0.000106 grad: 0.1467 (0.1706) loss: 0.7233 (0.7169) time: 0.1608 data: 0.0669 max mem: 9377 +Train: [29] [3100/6250] eta: 0:08:25 lr: 0.000106 grad: 0.1466 (0.1701) loss: 0.7072 (0.7168) time: 0.1425 data: 0.0496 max mem: 9377 +Train: [29] [3200/6250] eta: 0:08:09 lr: 0.000106 grad: 0.1431 (0.1696) loss: 0.7226 (0.7167) time: 0.1794 data: 0.0942 max mem: 9377 +Train: [29] [3300/6250] eta: 0:07:52 lr: 0.000106 grad: 0.1513 (0.1691) loss: 0.7146 (0.7167) time: 0.1651 data: 0.0826 max mem: 9377 +Train: [29] [3400/6250] eta: 0:07:36 lr: 0.000106 grad: 0.1433 (0.1686) loss: 0.7155 (0.7167) time: 0.1683 data: 0.0809 max mem: 9377 +Train: [29] [3500/6250] eta: 0:07:20 lr: 0.000105 grad: 0.1454 (0.1680) loss: 0.7062 (0.7165) time: 0.1608 data: 0.0810 max mem: 9377 +Train: [29] [3600/6250] eta: 0:07:03 lr: 0.000105 grad: 0.1447 (0.1676) loss: 0.7071 (0.7164) time: 0.1618 data: 0.0690 max mem: 9377 +Train: [29] [3700/6250] eta: 0:06:47 lr: 0.000105 grad: 0.1423 (0.1671) loss: 0.7189 (0.7164) time: 0.1604 data: 0.0757 max mem: 9377 +Train: [29] [3800/6250] eta: 0:06:31 lr: 0.000105 grad: 0.1432 (0.1666) loss: 0.7208 (0.7162) time: 0.1472 data: 0.0565 max mem: 9377 +Train: [29] [3900/6250] eta: 0:06:16 lr: 0.000105 grad: 0.1492 (0.1662) loss: 0.7132 (0.7162) time: 0.2099 data: 0.1103 max mem: 9377 +Train: [29] [4000/6250] eta: 0:05:59 lr: 0.000105 grad: 0.1568 (0.1659) loss: 0.6947 (0.7161) time: 0.1524 data: 0.0702 max mem: 9377 +Train: [29] [4100/6250] eta: 0:05:42 lr: 0.000105 grad: 0.1471 (0.1655) loss: 0.7187 (0.7160) time: 0.1341 data: 0.0475 max mem: 9377 +Train: [29] [4200/6250] eta: 0:05:26 lr: 0.000105 grad: 0.1418 (0.1652) loss: 0.7187 (0.7161) time: 0.1520 data: 0.0609 max mem: 9377 +Train: [29] [4300/6250] eta: 0:05:10 lr: 0.000105 grad: 0.1453 (0.1648) loss: 0.7336 (0.7161) time: 0.1534 data: 0.0704 max mem: 9377 +Train: [29] [4400/6250] eta: 0:04:54 lr: 0.000105 grad: 0.1510 (0.1644) loss: 0.7137 (0.7162) time: 0.1469 data: 0.0564 max mem: 9377 +Train: [29] [4500/6250] eta: 0:04:38 lr: 0.000105 grad: 0.1473 (0.1641) loss: 0.7168 (0.7163) time: 0.1293 data: 0.0326 max mem: 9377 +Train: [29] [4600/6250] eta: 0:04:22 lr: 0.000105 grad: 0.1453 (0.1638) loss: 0.7116 (0.7163) time: 0.1471 data: 0.0606 max mem: 9377 +Train: [29] [4700/6250] eta: 0:04:05 lr: 0.000105 grad: 0.1550 (0.1635) loss: 0.7168 (0.7163) time: 0.1435 data: 0.0451 max mem: 9377 +Train: [29] [4800/6250] eta: 0:03:49 lr: 0.000105 grad: 0.1452 (0.1633) loss: 0.7025 (0.7161) time: 0.1444 data: 0.0507 max mem: 9377 +Train: [29] [4900/6250] eta: 0:03:34 lr: 0.000105 grad: 0.1411 (0.1631) loss: 0.7187 (0.7160) time: 0.1604 data: 0.0739 max mem: 9377 +Train: [29] [5000/6250] eta: 0:03:18 lr: 0.000105 grad: 0.1473 (0.1628) loss: 0.7074 (0.7159) time: 0.1611 data: 0.0744 max mem: 9377 +Train: [29] [5100/6250] eta: 0:03:02 lr: 0.000105 grad: 0.1516 (0.1626) loss: 0.7158 (0.7158) time: 0.1639 data: 0.0669 max mem: 9377 +Train: [29] [5200/6250] eta: 0:02:46 lr: 0.000105 grad: 0.1493 (0.1625) loss: 0.7067 (0.7157) time: 0.1454 data: 0.0562 max mem: 9377 +Train: [29] [5300/6250] eta: 0:02:30 lr: 0.000105 grad: 0.1591 (0.1623) loss: 0.7033 (0.7155) time: 0.1483 data: 0.0639 max mem: 9377 +Train: [29] [5400/6250] eta: 0:02:14 lr: 0.000105 grad: 0.1574 (0.1622) loss: 0.7099 (0.7153) time: 0.1562 data: 0.0656 max mem: 9377 +Train: [29] [5500/6250] eta: 0:01:58 lr: 0.000105 grad: 0.1494 (0.1620) loss: 0.7081 (0.7152) time: 0.1602 data: 0.0720 max mem: 9377 +Train: [29] [5600/6250] eta: 0:01:42 lr: 0.000105 grad: 0.1470 (0.1618) loss: 0.7106 (0.7151) time: 0.1514 data: 0.0637 max mem: 9377 +Train: [29] [5700/6250] eta: 0:01:27 lr: 0.000105 grad: 0.1499 (0.1617) loss: 0.7001 (0.7150) time: 0.1573 data: 0.0784 max mem: 9377 +Train: [29] [5800/6250] eta: 0:01:11 lr: 0.000105 grad: 0.1572 (0.1616) loss: 0.6927 (0.7149) time: 0.1493 data: 0.0611 max mem: 9377 +Train: [29] [5900/6250] eta: 0:00:55 lr: 0.000105 grad: 0.1504 (0.1615) loss: 0.7043 (0.7148) time: 0.1502 data: 0.0653 max mem: 9377 +Train: [29] [6000/6250] eta: 0:00:39 lr: 0.000105 grad: 0.1494 (0.1612) loss: 0.7105 (0.7147) time: 0.1553 data: 0.0740 max mem: 9377 +Train: [29] [6100/6250] eta: 0:00:23 lr: 0.000105 grad: 0.1509 (0.1611) loss: 0.7166 (0.7147) time: 0.1570 data: 0.0704 max mem: 9377 +Train: [29] [6200/6250] eta: 0:00:07 lr: 0.000105 grad: 0.1520 (0.1610) loss: 0.7126 (0.7146) time: 0.1618 data: 0.0736 max mem: 9377 +Train: [29] [6249/6250] eta: 0:00:00 lr: 0.000105 grad: 0.1492 (0.1610) loss: 0.7149 (0.7146) time: 0.1680 data: 0.0810 max mem: 9377 +Train: [29] Total time: 0:16:35 (0.1593 s / it) +Averaged stats: lr: 0.000105 grad: 0.1492 (0.1610) loss: 0.7149 (0.7146) +Eval (hcp-train-subset): [29] [ 0/62] eta: 0:05:22 loss: 0.8688 (0.8688) time: 5.2055 data: 5.1432 max mem: 9377 +Eval (hcp-train-subset): [29] [61/62] eta: 0:00:00 loss: 0.8798 (0.8801) time: 0.1360 data: 0.1112 max mem: 9377 +Eval (hcp-train-subset): [29] Total time: 0:00:14 (0.2291 s / it) +Averaged stats (hcp-train-subset): loss: 0.8798 (0.8801) +Making plots (hcp-train-subset): example=1 +Eval (hcp-val): [29] [ 0/62] eta: 0:05:20 loss: 0.8783 (0.8783) time: 5.1767 data: 5.1467 max mem: 9377 +Eval (hcp-val): [29] [61/62] eta: 0:00:00 loss: 0.8769 (0.8799) time: 0.0991 data: 0.0740 max mem: 9377 +Eval (hcp-val): [29] Total time: 0:00:14 (0.2285 s / it) +Averaged stats (hcp-val): loss: 0.8769 (0.8799) +Making plots (hcp-val): example=28 +Eval (nsd-val): [29] [ 0/62] eta: 0:04:17 loss: 0.8421 (0.8421) time: 4.1496 data: 4.0902 max mem: 9377 +Eval (nsd-val): [29] [61/62] eta: 0:00:00 loss: 0.8542 (0.8556) time: 0.1457 data: 0.1204 max mem: 9377 +Eval (nsd-val): [29] Total time: 0:00:13 (0.2251 s / it) +Averaged stats (nsd-val): loss: 0.8542 (0.8556) +Making plots (nsd-val): example=16 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00029.pth +Train: [30] [ 0/6250] eta: 7:08:40 lr: 0.000105 grad: 0.1456 (0.1456) loss: 0.8248 (0.8248) time: 4.1153 data: 3.8434 max mem: 9377 +Train: [30] [ 100/6250] eta: 0:22:10 lr: 0.000105 grad: 0.2414 (0.2987) loss: 0.7022 (0.7352) time: 0.1481 data: 0.0571 max mem: 9377 +Train: [30] [ 200/6250] eta: 0:19:21 lr: 0.000105 grad: 0.2501 (0.2793) loss: 0.6851 (0.7245) time: 0.1784 data: 0.0964 max mem: 9377 +Train: [30] [ 300/6250] eta: 0:17:51 lr: 0.000105 grad: 0.2059 (0.2620) loss: 0.7303 (0.7204) time: 0.1539 data: 0.0607 max mem: 9377 +Train: [30] [ 400/6250] eta: 0:17:06 lr: 0.000105 grad: 0.2170 (0.2506) loss: 0.7152 (0.7178) time: 0.1662 data: 0.0861 max mem: 9377 +Train: [30] [ 500/6250] eta: 0:16:19 lr: 0.000105 grad: 0.2579 (0.2481) loss: 0.7037 (0.7169) time: 0.1843 data: 0.0924 max mem: 9377 +Train: [30] [ 600/6250] eta: 0:15:48 lr: 0.000105 grad: 0.1897 (0.2439) loss: 0.7148 (0.7166) time: 0.1572 data: 0.0684 max mem: 9377 +Train: [30] [ 700/6250] eta: 0:15:23 lr: 0.000105 grad: 0.1797 (0.2358) loss: 0.7105 (0.7157) time: 0.1525 data: 0.0488 max mem: 9377 +Train: [30] [ 800/6250] eta: 0:14:54 lr: 0.000105 grad: 0.1901 (0.2308) loss: 0.7181 (0.7156) time: 0.1512 data: 0.0514 max mem: 9377 +Train: [30] [ 900/6250] eta: 0:14:28 lr: 0.000105 grad: 0.1661 (0.2252) loss: 0.7070 (0.7151) time: 0.1459 data: 0.0514 max mem: 9377 +Train: [30] [1000/6250] eta: 0:14:03 lr: 0.000105 grad: 0.1571 (0.2193) loss: 0.7163 (0.7152) time: 0.1402 data: 0.0411 max mem: 9377 +Train: [30] [1100/6250] eta: 0:13:39 lr: 0.000105 grad: 0.1596 (0.2138) loss: 0.7097 (0.7150) time: 0.1530 data: 0.0692 max mem: 9377 +Train: [30] [1200/6250] eta: 0:13:26 lr: 0.000105 grad: 0.1536 (0.2091) loss: 0.7176 (0.7153) time: 0.1502 data: 0.0669 max mem: 9377 +Train: [30] [1300/6250] eta: 0:13:10 lr: 0.000105 grad: 0.1434 (0.2046) loss: 0.7162 (0.7157) time: 0.1745 data: 0.0865 max mem: 9377 +Train: [30] [1400/6250] eta: 0:12:56 lr: 0.000104 grad: 0.1510 (0.2007) loss: 0.7160 (0.7158) time: 0.1777 data: 0.0946 max mem: 9377 +Train: [30] [1500/6250] eta: 0:12:35 lr: 0.000104 grad: 0.1430 (0.1973) loss: 0.7118 (0.7159) time: 0.1413 data: 0.0622 max mem: 9377 +Train: [30] [1600/6250] eta: 0:12:13 lr: 0.000104 grad: 0.1436 (0.1941) loss: 0.7053 (0.7157) time: 0.1317 data: 0.0335 max mem: 9377 +Train: [30] [1700/6250] eta: 0:12:01 lr: 0.000104 grad: 0.1499 (0.1916) loss: 0.7163 (0.7156) time: 0.1698 data: 0.0800 max mem: 9377 +Train: [30] [1800/6250] eta: 0:11:47 lr: 0.000104 grad: 0.1395 (0.1891) loss: 0.7164 (0.7153) time: 0.1576 data: 0.0439 max mem: 9377 +Train: [30] [1900/6250] eta: 0:11:30 lr: 0.000104 grad: 0.1446 (0.1872) loss: 0.6935 (0.7151) time: 0.1585 data: 0.0635 max mem: 9377 +Train: [30] [2000/6250] eta: 0:11:18 lr: 0.000104 grad: 0.1474 (0.1852) loss: 0.7049 (0.7148) time: 0.1791 data: 0.0941 max mem: 9377 +Train: [30] [2100/6250] eta: 0:11:05 lr: 0.000104 grad: 0.1499 (0.1836) loss: 0.6972 (0.7144) time: 0.1706 data: 0.0867 max mem: 9377 +Train: [30] [2200/6250] eta: 0:10:54 lr: 0.000104 grad: 0.1504 (0.1822) loss: 0.6849 (0.7138) time: 0.2349 data: 0.1499 max mem: 9377 +Train: [30] [2300/6250] eta: 0:10:37 lr: 0.000104 grad: 0.1591 (0.1811) loss: 0.6994 (0.7132) time: 0.1417 data: 0.0564 max mem: 9377 +Train: [30] [2400/6250] eta: 0:10:22 lr: 0.000104 grad: 0.1550 (0.1800) loss: 0.6947 (0.7129) time: 0.1925 data: 0.1013 max mem: 9377 +Train: [30] [2500/6250] eta: 0:10:07 lr: 0.000104 grad: 0.1534 (0.1788) loss: 0.7071 (0.7129) time: 0.1741 data: 0.0760 max mem: 9377 +Train: [30] [2600/6250] eta: 0:09:51 lr: 0.000104 grad: 0.1509 (0.1779) loss: 0.6946 (0.7127) time: 0.1500 data: 0.0569 max mem: 9377 +Train: [30] [2700/6250] eta: 0:09:35 lr: 0.000104 grad: 0.1566 (0.1769) loss: 0.7044 (0.7126) time: 0.1577 data: 0.0608 max mem: 9377 +Train: [30] [2800/6250] eta: 0:09:19 lr: 0.000104 grad: 0.1526 (0.1760) loss: 0.7069 (0.7126) time: 0.1661 data: 0.0747 max mem: 9377 +Train: [30] [2900/6250] eta: 0:09:03 lr: 0.000104 grad: 0.1466 (0.1752) loss: 0.7111 (0.7122) time: 0.1830 data: 0.0953 max mem: 9377 +Train: [30] [3000/6250] eta: 0:08:45 lr: 0.000104 grad: 0.1536 (0.1745) loss: 0.6993 (0.7120) time: 0.1507 data: 0.0644 max mem: 9377 +Train: [30] [3100/6250] eta: 0:08:29 lr: 0.000104 grad: 0.1527 (0.1737) loss: 0.7031 (0.7119) time: 0.1503 data: 0.0685 max mem: 9377 +Train: [30] [3200/6250] eta: 0:08:13 lr: 0.000104 grad: 0.1482 (0.1730) loss: 0.7045 (0.7117) time: 0.1790 data: 0.1000 max mem: 9377 +Train: [30] [3300/6250] eta: 0:07:57 lr: 0.000104 grad: 0.1521 (0.1723) loss: 0.7057 (0.7117) time: 0.1879 data: 0.0996 max mem: 9377 +Train: [30] [3400/6250] eta: 0:07:40 lr: 0.000104 grad: 0.1584 (0.1719) loss: 0.6974 (0.7115) time: 0.1540 data: 0.0717 max mem: 9377 +Train: [30] [3500/6250] eta: 0:07:24 lr: 0.000104 grad: 0.1463 (0.1714) loss: 0.7064 (0.7115) time: 0.1680 data: 0.0801 max mem: 9377 +Train: [30] [3600/6250] eta: 0:07:08 lr: 0.000104 grad: 0.1518 (0.1708) loss: 0.7094 (0.7116) time: 0.1589 data: 0.0708 max mem: 9377 +Train: [30] [3700/6250] eta: 0:06:51 lr: 0.000104 grad: 0.1481 (0.1704) loss: 0.7100 (0.7115) time: 0.1491 data: 0.0655 max mem: 9377 +Train: [30] [3800/6250] eta: 0:06:35 lr: 0.000104 grad: 0.1483 (0.1700) loss: 0.7142 (0.7114) time: 0.1653 data: 0.0737 max mem: 9377 +Train: [30] [3900/6250] eta: 0:06:18 lr: 0.000104 grad: 0.1492 (0.1696) loss: 0.7136 (0.7114) time: 0.1524 data: 0.0565 max mem: 9377 +Train: [30] [4000/6250] eta: 0:06:02 lr: 0.000104 grad: 0.1477 (0.1692) loss: 0.7122 (0.7112) time: 0.1610 data: 0.0680 max mem: 9377 +Train: [30] [4100/6250] eta: 0:05:46 lr: 0.000104 grad: 0.1484 (0.1689) loss: 0.7009 (0.7110) time: 0.1451 data: 0.0586 max mem: 9377 +Train: [30] [4200/6250] eta: 0:05:30 lr: 0.000104 grad: 0.1556 (0.1686) loss: 0.6935 (0.7106) time: 0.1638 data: 0.0853 max mem: 9377 +Train: [30] [4300/6250] eta: 0:05:13 lr: 0.000104 grad: 0.1578 (0.1683) loss: 0.7099 (0.7103) time: 0.1619 data: 0.0757 max mem: 9377 +Train: [30] [4400/6250] eta: 0:04:57 lr: 0.000104 grad: 0.1554 (0.1680) loss: 0.6937 (0.7101) time: 0.1448 data: 0.0535 max mem: 9377 +Train: [30] [4500/6250] eta: 0:04:41 lr: 0.000104 grad: 0.1424 (0.1677) loss: 0.7202 (0.7100) time: 0.1758 data: 0.0891 max mem: 9377 +Train: [30] [4600/6250] eta: 0:04:24 lr: 0.000104 grad: 0.1474 (0.1674) loss: 0.7127 (0.7098) time: 0.1556 data: 0.0721 max mem: 9377 +Train: [30] [4700/6250] eta: 0:04:08 lr: 0.000104 grad: 0.1444 (0.1670) loss: 0.7057 (0.7098) time: 0.1509 data: 0.0601 max mem: 9377 +Train: [30] [4800/6250] eta: 0:03:52 lr: 0.000104 grad: 0.1546 (0.1667) loss: 0.6959 (0.7098) time: 0.1458 data: 0.0614 max mem: 9377 +Train: [30] [4900/6250] eta: 0:03:36 lr: 0.000104 grad: 0.1502 (0.1665) loss: 0.7005 (0.7097) time: 0.1581 data: 0.0709 max mem: 9377 +Train: [30] [5000/6250] eta: 0:03:20 lr: 0.000104 grad: 0.1512 (0.1663) loss: 0.7080 (0.7095) time: 0.1831 data: 0.0987 max mem: 9377 +Train: [30] [5100/6250] eta: 0:03:03 lr: 0.000104 grad: 0.1612 (0.1660) loss: 0.7114 (0.7094) time: 0.1439 data: 0.0549 max mem: 9377 +Train: [30] [5200/6250] eta: 0:02:47 lr: 0.000104 grad: 0.1653 (0.1659) loss: 0.6996 (0.7093) time: 0.1578 data: 0.0614 max mem: 9377 +Train: [30] [5300/6250] eta: 0:02:31 lr: 0.000104 grad: 0.1485 (0.1657) loss: 0.7093 (0.7092) time: 0.1503 data: 0.0692 max mem: 9377 +Train: [30] [5400/6250] eta: 0:02:15 lr: 0.000103 grad: 0.1521 (0.1655) loss: 0.7002 (0.7091) time: 0.1470 data: 0.0496 max mem: 9377 +Train: [30] [5500/6250] eta: 0:01:59 lr: 0.000103 grad: 0.1493 (0.1653) loss: 0.7105 (0.7090) time: 0.1430 data: 0.0502 max mem: 9377 +Train: [30] [5600/6250] eta: 0:01:43 lr: 0.000103 grad: 0.1585 (0.1652) loss: 0.6904 (0.7088) time: 0.1481 data: 0.0554 max mem: 9377 +Train: [30] [5700/6250] eta: 0:01:27 lr: 0.000103 grad: 0.1515 (0.1650) loss: 0.7085 (0.7088) time: 0.1507 data: 0.0550 max mem: 9377 +Train: [30] [5800/6250] eta: 0:01:11 lr: 0.000103 grad: 0.1566 (0.1648) loss: 0.6959 (0.7087) time: 0.1569 data: 0.0666 max mem: 9377 +Train: [30] [5900/6250] eta: 0:00:55 lr: 0.000103 grad: 0.1459 (0.1646) loss: 0.6946 (0.7087) time: 0.1534 data: 0.0698 max mem: 9377 +Train: [30] [6000/6250] eta: 0:00:39 lr: 0.000103 grad: 0.1509 (0.1645) loss: 0.7202 (0.7085) time: 0.1531 data: 0.0710 max mem: 9377 +Train: [30] [6100/6250] eta: 0:00:23 lr: 0.000103 grad: 0.1549 (0.1644) loss: 0.7022 (0.7085) time: 0.1486 data: 0.0507 max mem: 9377 +Train: [30] [6200/6250] eta: 0:00:07 lr: 0.000103 grad: 0.1543 (0.1642) loss: 0.6968 (0.7084) time: 0.1409 data: 0.0546 max mem: 9377 +Train: [30] [6249/6250] eta: 0:00:00 lr: 0.000103 grad: 0.1515 (0.1641) loss: 0.6966 (0.7084) time: 0.1329 data: 0.0452 max mem: 9377 +Train: [30] Total time: 0:16:42 (0.1604 s / it) +Averaged stats: lr: 0.000103 grad: 0.1515 (0.1641) loss: 0.6966 (0.7084) +Eval (hcp-train-subset): [30] [ 0/62] eta: 0:04:32 loss: 0.8753 (0.8753) time: 4.3990 data: 4.3106 max mem: 9377 +Eval (hcp-train-subset): [30] [61/62] eta: 0:00:00 loss: 0.8816 (0.8832) time: 0.1319 data: 0.1068 max mem: 9377 +Eval (hcp-train-subset): [30] Total time: 0:00:14 (0.2372 s / it) +Averaged stats (hcp-train-subset): loss: 0.8816 (0.8832) +Eval (hcp-val): [30] [ 0/62] eta: 0:03:49 loss: 0.8821 (0.8821) time: 3.7065 data: 3.6192 max mem: 9377 +Eval (hcp-val): [30] [61/62] eta: 0:00:00 loss: 0.8788 (0.8826) time: 0.1309 data: 0.1055 max mem: 9377 +Eval (hcp-val): [30] Total time: 0:00:15 (0.2488 s / it) +Averaged stats (hcp-val): loss: 0.8788 (0.8826) +Eval (nsd-val): [30] [ 0/62] eta: 0:06:04 loss: 0.8542 (0.8542) time: 5.8713 data: 5.8405 max mem: 9377 +Eval (nsd-val): [30] [61/62] eta: 0:00:00 loss: 0.8643 (0.8650) time: 0.1300 data: 0.1041 max mem: 9377 +Eval (nsd-val): [30] Total time: 0:00:14 (0.2295 s / it) +Averaged stats (nsd-val): loss: 0.8643 (0.8650) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [31] [ 0/6250] eta: 11:19:24 lr: 0.000103 grad: 0.2357 (0.2357) loss: 0.6689 (0.6689) time: 6.5223 data: 6.4291 max mem: 9377 +Train: [31] [ 100/6250] eta: 0:22:25 lr: 0.000103 grad: 0.2533 (0.2811) loss: 0.7346 (0.7527) time: 0.1668 data: 0.0662 max mem: 9377 +Train: [31] [ 200/6250] eta: 0:19:50 lr: 0.000103 grad: 0.2662 (0.3002) loss: 0.7309 (0.7393) time: 0.1644 data: 0.0615 max mem: 9377 +Train: [31] [ 300/6250] eta: 0:18:35 lr: 0.000103 grad: 0.2073 (0.2850) loss: 0.7308 (0.7323) time: 0.1757 data: 0.0836 max mem: 9377 +Train: [31] [ 400/6250] eta: 0:17:29 lr: 0.000103 grad: 0.1876 (0.2633) loss: 0.7131 (0.7284) time: 0.1480 data: 0.0602 max mem: 9377 +Train: [31] [ 500/6250] eta: 0:16:38 lr: 0.000103 grad: 0.1755 (0.2501) loss: 0.7054 (0.7246) time: 0.1523 data: 0.0503 max mem: 9377 +Train: [31] [ 600/6250] eta: 0:16:09 lr: 0.000103 grad: 0.1802 (0.2427) loss: 0.7106 (0.7211) time: 0.1726 data: 0.0709 max mem: 9377 +Train: [31] [ 700/6250] eta: 0:15:38 lr: 0.000103 grad: 0.1688 (0.2340) loss: 0.6841 (0.7184) time: 0.1621 data: 0.0463 max mem: 9377 +Train: [31] [ 800/6250] eta: 0:15:12 lr: 0.000103 grad: 0.1601 (0.2254) loss: 0.7018 (0.7162) time: 0.1848 data: 0.0866 max mem: 9377 +Train: [31] [ 900/6250] eta: 0:14:44 lr: 0.000103 grad: 0.1422 (0.2173) loss: 0.7157 (0.7150) time: 0.1673 data: 0.0646 max mem: 9377 +Train: [31] [1000/6250] eta: 0:14:23 lr: 0.000103 grad: 0.1496 (0.2106) loss: 0.7016 (0.7144) time: 0.1562 data: 0.0662 max mem: 9377 +Train: [31] [1100/6250] eta: 0:14:03 lr: 0.000103 grad: 0.1500 (0.2056) loss: 0.6942 (0.7138) time: 0.1755 data: 0.0807 max mem: 9377 +Train: [31] [1200/6250] eta: 0:13:42 lr: 0.000103 grad: 0.1507 (0.2007) loss: 0.6912 (0.7128) time: 0.1611 data: 0.0734 max mem: 9377 +Train: [31] [1300/6250] eta: 0:13:33 lr: 0.000103 grad: 0.1474 (0.1971) loss: 0.7157 (0.7119) time: 0.1788 data: 0.0784 max mem: 9377 +Train: [31] [1400/6250] eta: 0:13:19 lr: 0.000103 grad: 0.1582 (0.1944) loss: 0.6969 (0.7112) time: 0.1863 data: 0.0865 max mem: 9377 +Train: [31] [1500/6250] eta: 0:13:05 lr: 0.000103 grad: 0.1523 (0.1916) loss: 0.7074 (0.7106) time: 0.1541 data: 0.0695 max mem: 9377 +Train: [31] [1600/6250] eta: 0:12:50 lr: 0.000103 grad: 0.1464 (0.1891) loss: 0.7015 (0.7101) time: 0.1543 data: 0.0667 max mem: 9377 +Train: [31] [1700/6250] eta: 0:12:36 lr: 0.000103 grad: 0.1466 (0.1871) loss: 0.7033 (0.7098) time: 0.1706 data: 0.0747 max mem: 9377 +Train: [31] [1800/6250] eta: 0:12:19 lr: 0.000103 grad: 0.1457 (0.1852) loss: 0.7090 (0.7095) time: 0.1512 data: 0.0640 max mem: 9377 +Train: [31] [1900/6250] eta: 0:12:05 lr: 0.000103 grad: 0.1445 (0.1835) loss: 0.7074 (0.7091) time: 0.1760 data: 0.0822 max mem: 9377 +Train: [31] [2000/6250] eta: 0:11:46 lr: 0.000103 grad: 0.1475 (0.1818) loss: 0.7127 (0.7090) time: 0.1427 data: 0.0442 max mem: 9377 +Train: [31] [2100/6250] eta: 0:11:33 lr: 0.000103 grad: 0.1445 (0.1805) loss: 0.7071 (0.7091) time: 0.1790 data: 0.0800 max mem: 9377 +Train: [31] [2200/6250] eta: 0:11:15 lr: 0.000103 grad: 0.1492 (0.1792) loss: 0.7019 (0.7091) time: 0.1713 data: 0.0876 max mem: 9377 +Train: [31] [2300/6250] eta: 0:10:58 lr: 0.000103 grad: 0.1458 (0.1779) loss: 0.7109 (0.7091) time: 0.1599 data: 0.0663 max mem: 9377 +Train: [31] [2400/6250] eta: 0:10:40 lr: 0.000103 grad: 0.1499 (0.1768) loss: 0.7072 (0.7091) time: 0.1725 data: 0.0743 max mem: 9377 +Train: [31] [2500/6250] eta: 0:10:23 lr: 0.000103 grad: 0.1440 (0.1757) loss: 0.7112 (0.7093) time: 0.1505 data: 0.0596 max mem: 9377 +Train: [31] [2600/6250] eta: 0:10:06 lr: 0.000103 grad: 0.1511 (0.1749) loss: 0.7038 (0.7093) time: 0.1551 data: 0.0546 max mem: 9377 +Train: [31] [2700/6250] eta: 0:09:48 lr: 0.000103 grad: 0.1473 (0.1740) loss: 0.7064 (0.7092) time: 0.1625 data: 0.0684 max mem: 9377 +Train: [31] [2800/6250] eta: 0:09:31 lr: 0.000103 grad: 0.1547 (0.1732) loss: 0.6968 (0.7091) time: 0.1562 data: 0.0706 max mem: 9377 +Train: [31] [2900/6250] eta: 0:09:13 lr: 0.000103 grad: 0.1369 (0.1724) loss: 0.7237 (0.7091) time: 0.1516 data: 0.0632 max mem: 9377 +Train: [31] [3000/6250] eta: 0:08:55 lr: 0.000103 grad: 0.1537 (0.1718) loss: 0.7143 (0.7092) time: 0.1423 data: 0.0490 max mem: 9377 +Train: [31] [3100/6250] eta: 0:08:37 lr: 0.000103 grad: 0.1455 (0.1711) loss: 0.7011 (0.7091) time: 0.1367 data: 0.0384 max mem: 9377 +Train: [31] [3200/6250] eta: 0:08:20 lr: 0.000102 grad: 0.1549 (0.1705) loss: 0.7164 (0.7090) time: 0.1623 data: 0.0764 max mem: 9377 +Train: [31] [3300/6250] eta: 0:08:03 lr: 0.000102 grad: 0.1542 (0.1699) loss: 0.7137 (0.7091) time: 0.1734 data: 0.0868 max mem: 9377 +Train: [31] [3400/6250] eta: 0:07:46 lr: 0.000102 grad: 0.1527 (0.1694) loss: 0.7122 (0.7090) time: 0.1393 data: 0.0564 max mem: 9377 +Train: [31] [3500/6250] eta: 0:07:29 lr: 0.000102 grad: 0.1516 (0.1691) loss: 0.7020 (0.7088) time: 0.1323 data: 0.0400 max mem: 9377 +Train: [31] [3600/6250] eta: 0:07:12 lr: 0.000102 grad: 0.1482 (0.1686) loss: 0.7019 (0.7086) time: 0.1713 data: 0.0878 max mem: 9377 +Train: [31] [3700/6250] eta: 0:06:55 lr: 0.000102 grad: 0.1531 (0.1683) loss: 0.7101 (0.7085) time: 0.1528 data: 0.0561 max mem: 9377 +Train: [31] [3800/6250] eta: 0:06:38 lr: 0.000102 grad: 0.1599 (0.1680) loss: 0.6997 (0.7084) time: 0.1369 data: 0.0474 max mem: 9377 +Train: [31] [3900/6250] eta: 0:06:21 lr: 0.000102 grad: 0.1556 (0.1677) loss: 0.7122 (0.7083) time: 0.1476 data: 0.0600 max mem: 9377 +Train: [31] [4000/6250] eta: 0:06:04 lr: 0.000102 grad: 0.1471 (0.1673) loss: 0.7156 (0.7082) time: 0.1408 data: 0.0491 max mem: 9377 +Train: [31] [4100/6250] eta: 0:05:48 lr: 0.000102 grad: 0.1509 (0.1671) loss: 0.7150 (0.7081) time: 0.1555 data: 0.0650 max mem: 9377 +Train: [31] [4200/6250] eta: 0:05:31 lr: 0.000102 grad: 0.1488 (0.1667) loss: 0.6924 (0.7081) time: 0.1486 data: 0.0565 max mem: 9377 +Train: [31] [4300/6250] eta: 0:05:15 lr: 0.000102 grad: 0.1527 (0.1665) loss: 0.7048 (0.7080) time: 0.1442 data: 0.0496 max mem: 9377 +Train: [31] [4400/6250] eta: 0:04:58 lr: 0.000102 grad: 0.1457 (0.1661) loss: 0.7035 (0.7081) time: 0.1649 data: 0.0768 max mem: 9377 +Train: [31] [4500/6250] eta: 0:04:41 lr: 0.000102 grad: 0.1462 (0.1658) loss: 0.7175 (0.7082) time: 0.1520 data: 0.0632 max mem: 9377 +Train: [31] [4600/6250] eta: 0:04:25 lr: 0.000102 grad: 0.1588 (0.1656) loss: 0.6988 (0.7082) time: 0.1562 data: 0.0727 max mem: 9377 +Train: [31] [4700/6250] eta: 0:04:09 lr: 0.000102 grad: 0.1525 (0.1654) loss: 0.7035 (0.7081) time: 0.1451 data: 0.0573 max mem: 9377 +Train: [31] [4800/6250] eta: 0:03:53 lr: 0.000102 grad: 0.1463 (0.1651) loss: 0.7088 (0.7081) time: 0.1830 data: 0.1021 max mem: 9377 +Train: [31] [4900/6250] eta: 0:03:37 lr: 0.000102 grad: 0.1513 (0.1649) loss: 0.7051 (0.7081) time: 0.1554 data: 0.0686 max mem: 9377 +Train: [31] [5000/6250] eta: 0:03:20 lr: 0.000102 grad: 0.1478 (0.1648) loss: 0.6986 (0.7079) time: 0.1565 data: 0.0625 max mem: 9377 +Train: [31] [5100/6250] eta: 0:03:04 lr: 0.000102 grad: 0.1486 (0.1646) loss: 0.6981 (0.7077) time: 0.1935 data: 0.1171 max mem: 9377 +Train: [31] [5200/6250] eta: 0:02:48 lr: 0.000102 grad: 0.1447 (0.1646) loss: 0.7127 (0.7077) time: 0.1431 data: 0.0495 max mem: 9377 +Train: [31] [5300/6250] eta: 0:02:32 lr: 0.000102 grad: 0.1522 (0.1643) loss: 0.7096 (0.7076) time: 0.1559 data: 0.0647 max mem: 9377 +Train: [31] [5400/6250] eta: 0:02:16 lr: 0.000102 grad: 0.1490 (0.1641) loss: 0.7080 (0.7076) time: 0.1408 data: 0.0439 max mem: 9377 +Train: [31] [5500/6250] eta: 0:02:00 lr: 0.000102 grad: 0.1525 (0.1639) loss: 0.6960 (0.7075) time: 0.1424 data: 0.0597 max mem: 9377 +Train: [31] [5600/6250] eta: 0:01:43 lr: 0.000102 grad: 0.1477 (0.1637) loss: 0.7106 (0.7075) time: 0.1834 data: 0.0943 max mem: 9377 +Train: [31] [5700/6250] eta: 0:01:27 lr: 0.000102 grad: 0.1517 (0.1636) loss: 0.7085 (0.7075) time: 0.1585 data: 0.0702 max mem: 9377 +Train: [31] [5800/6250] eta: 0:01:11 lr: 0.000102 grad: 0.1475 (0.1634) loss: 0.7055 (0.7075) time: 0.1394 data: 0.0505 max mem: 9377 +Train: [31] [5900/6250] eta: 0:00:55 lr: 0.000102 grad: 0.1464 (0.1632) loss: 0.7120 (0.7076) time: 0.1585 data: 0.0732 max mem: 9377 +Train: [31] [6000/6250] eta: 0:00:39 lr: 0.000102 grad: 0.1558 (0.1631) loss: 0.7066 (0.7077) time: 0.1421 data: 0.0594 max mem: 9377 +Train: [31] [6100/6250] eta: 0:00:23 lr: 0.000102 grad: 0.1512 (0.1630) loss: 0.7122 (0.7077) time: 0.1569 data: 0.0697 max mem: 9377 +Train: [31] [6200/6250] eta: 0:00:07 lr: 0.000102 grad: 0.1444 (0.1628) loss: 0.7116 (0.7078) time: 0.1283 data: 0.0359 max mem: 9377 +Train: [31] [6249/6250] eta: 0:00:00 lr: 0.000102 grad: 0.1703 (0.1629) loss: 0.7025 (0.7078) time: 0.1600 data: 0.0754 max mem: 9377 +Train: [31] Total time: 0:16:40 (0.1601 s / it) +Averaged stats: lr: 0.000102 grad: 0.1703 (0.1629) loss: 0.7025 (0.7078) +Eval (hcp-train-subset): [31] [ 0/62] eta: 0:03:37 loss: 0.8733 (0.8733) time: 3.5096 data: 3.4127 max mem: 9377 +Eval (hcp-train-subset): [31] [61/62] eta: 0:00:00 loss: 0.8802 (0.8815) time: 0.1491 data: 0.1240 max mem: 9377 +Eval (hcp-train-subset): [31] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (hcp-train-subset): loss: 0.8802 (0.8815) +Eval (hcp-val): [31] [ 0/62] eta: 0:06:26 loss: 0.8794 (0.8794) time: 6.2370 data: 6.2069 max mem: 9377 +Eval (hcp-val): [31] [61/62] eta: 0:00:00 loss: 0.8773 (0.8808) time: 0.1299 data: 0.1030 max mem: 9377 +Eval (hcp-val): [31] Total time: 0:00:14 (0.2396 s / it) +Averaged stats (hcp-val): loss: 0.8773 (0.8808) +Eval (nsd-val): [31] [ 0/62] eta: 0:03:58 loss: 0.8486 (0.8486) time: 3.8538 data: 3.7714 max mem: 9377 +Eval (nsd-val): [31] [61/62] eta: 0:00:00 loss: 0.8578 (0.8605) time: 0.1439 data: 0.1182 max mem: 9377 +Eval (nsd-val): [31] Total time: 0:00:14 (0.2384 s / it) +Averaged stats (nsd-val): loss: 0.8578 (0.8605) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [32] [ 0/6250] eta: 7:44:56 lr: 0.000102 grad: 0.1321 (0.1321) loss: 0.8425 (0.8425) time: 4.4635 data: 4.1072 max mem: 9377 +Train: [32] [ 100/6250] eta: 0:22:53 lr: 0.000102 grad: 0.3225 (0.3437) loss: 0.6994 (0.7273) time: 0.1729 data: 0.0601 max mem: 9377 +Train: [32] [ 200/6250] eta: 0:20:01 lr: 0.000102 grad: 0.2169 (0.2955) loss: 0.7068 (0.7206) time: 0.1914 data: 0.1007 max mem: 9377 +Train: [32] [ 300/6250] eta: 0:18:34 lr: 0.000102 grad: 0.1877 (0.2666) loss: 0.7051 (0.7164) time: 0.1679 data: 0.0795 max mem: 9377 +Train: [32] [ 400/6250] eta: 0:17:50 lr: 0.000102 grad: 0.1870 (0.2509) loss: 0.7178 (0.7137) time: 0.1760 data: 0.0838 max mem: 9377 +Train: [32] [ 500/6250] eta: 0:17:05 lr: 0.000102 grad: 0.2369 (0.2437) loss: 0.7037 (0.7134) time: 0.1517 data: 0.0666 max mem: 9377 +Train: [32] [ 600/6250] eta: 0:16:48 lr: 0.000102 grad: 0.2459 (0.2465) loss: 0.6986 (0.7133) time: 0.1995 data: 0.0856 max mem: 9377 +Train: [32] [ 700/6250] eta: 0:16:35 lr: 0.000102 grad: 0.2131 (0.2435) loss: 0.7071 (0.7130) time: 0.1669 data: 0.0574 max mem: 9377 +Train: [32] [ 800/6250] eta: 0:16:24 lr: 0.000101 grad: 0.1727 (0.2385) loss: 0.7008 (0.7120) time: 0.2099 data: 0.1057 max mem: 9377 +Train: [32] [ 900/6250] eta: 0:16:03 lr: 0.000101 grad: 0.1603 (0.2306) loss: 0.6985 (0.7112) time: 0.1651 data: 0.0614 max mem: 9377 +Train: [32] [1000/6250] eta: 0:15:36 lr: 0.000101 grad: 0.1560 (0.2233) loss: 0.7219 (0.7108) time: 0.1524 data: 0.0542 max mem: 9377 +Train: [32] [1100/6250] eta: 0:15:11 lr: 0.000101 grad: 0.1604 (0.2174) loss: 0.7198 (0.7109) time: 0.1774 data: 0.0731 max mem: 9377 +Train: [32] [1200/6250] eta: 0:14:43 lr: 0.000101 grad: 0.1552 (0.2126) loss: 0.6966 (0.7104) time: 0.1636 data: 0.0661 max mem: 9377 +Train: [32] [1300/6250] eta: 0:14:24 lr: 0.000101 grad: 0.1530 (0.2084) loss: 0.7006 (0.7097) time: 0.1589 data: 0.0830 max mem: 9377 +Train: [32] [1400/6250] eta: 0:14:03 lr: 0.000101 grad: 0.1538 (0.2046) loss: 0.7155 (0.7094) time: 0.1583 data: 0.0665 max mem: 9377 +Train: [32] [1500/6250] eta: 0:13:45 lr: 0.000101 grad: 0.1471 (0.2010) loss: 0.7255 (0.7094) time: 0.1822 data: 0.0890 max mem: 9377 +Train: [32] [1600/6250] eta: 0:13:24 lr: 0.000101 grad: 0.1529 (0.1980) loss: 0.7110 (0.7092) time: 0.1432 data: 0.0572 max mem: 9377 +Train: [32] [1700/6250] eta: 0:13:07 lr: 0.000101 grad: 0.1505 (0.1952) loss: 0.7058 (0.7090) time: 0.1597 data: 0.0602 max mem: 9377 +Train: [32] [1800/6250] eta: 0:12:47 lr: 0.000101 grad: 0.1494 (0.1929) loss: 0.7069 (0.7088) time: 0.1474 data: 0.0506 max mem: 9377 +Train: [32] [1900/6250] eta: 0:12:29 lr: 0.000101 grad: 0.1478 (0.1909) loss: 0.6931 (0.7086) time: 0.1672 data: 0.0690 max mem: 9377 +Train: [32] [2000/6250] eta: 0:12:09 lr: 0.000101 grad: 0.1675 (0.1894) loss: 0.6976 (0.7084) time: 0.1591 data: 0.0573 max mem: 9377 +Train: [32] [2100/6250] eta: 0:11:48 lr: 0.000101 grad: 0.1464 (0.1876) loss: 0.7026 (0.7083) time: 0.1447 data: 0.0526 max mem: 9377 +Train: [32] [2200/6250] eta: 0:11:32 lr: 0.000101 grad: 0.1481 (0.1859) loss: 0.6999 (0.7079) time: 0.1666 data: 0.0792 max mem: 9377 +Train: [32] [2300/6250] eta: 0:11:15 lr: 0.000101 grad: 0.1572 (0.1848) loss: 0.7068 (0.7077) time: 0.1600 data: 0.0718 max mem: 9377 +Train: [32] [2400/6250] eta: 0:10:57 lr: 0.000101 grad: 0.1524 (0.1837) loss: 0.6891 (0.7074) time: 0.1395 data: 0.0546 max mem: 9377 +Train: [32] [2500/6250] eta: 0:10:42 lr: 0.000101 grad: 0.1519 (0.1825) loss: 0.6978 (0.7072) time: 0.1691 data: 0.0611 max mem: 9377 +Train: [32] [2600/6250] eta: 0:10:24 lr: 0.000101 grad: 0.1569 (0.1815) loss: 0.6982 (0.7070) time: 0.1784 data: 0.0923 max mem: 9377 +Train: [32] [2700/6250] eta: 0:10:05 lr: 0.000101 grad: 0.1488 (0.1804) loss: 0.7124 (0.7070) time: 0.1628 data: 0.0660 max mem: 9377 +Train: [32] [2800/6250] eta: 0:09:47 lr: 0.000101 grad: 0.1455 (0.1795) loss: 0.7142 (0.7069) time: 0.1507 data: 0.0598 max mem: 9377 +Train: [32] [2900/6250] eta: 0:09:28 lr: 0.000101 grad: 0.1509 (0.1786) loss: 0.7103 (0.7070) time: 0.1509 data: 0.0563 max mem: 9377 +Train: [32] [3000/6250] eta: 0:09:09 lr: 0.000101 grad: 0.1610 (0.1778) loss: 0.6871 (0.7068) time: 0.1351 data: 0.0428 max mem: 9377 +Train: [32] [3100/6250] eta: 0:08:50 lr: 0.000101 grad: 0.1544 (0.1771) loss: 0.6825 (0.7067) time: 0.1413 data: 0.0485 max mem: 9377 +Train: [32] [3200/6250] eta: 0:08:33 lr: 0.000101 grad: 0.1512 (0.1764) loss: 0.7055 (0.7066) time: 0.1579 data: 0.0579 max mem: 9377 +Train: [32] [3300/6250] eta: 0:08:14 lr: 0.000101 grad: 0.1475 (0.1758) loss: 0.7074 (0.7064) time: 0.1383 data: 0.0525 max mem: 9377 +Train: [32] [3400/6250] eta: 0:07:56 lr: 0.000101 grad: 0.1562 (0.1752) loss: 0.6963 (0.7063) time: 0.1567 data: 0.0618 max mem: 9377 +Train: [32] [3500/6250] eta: 0:07:38 lr: 0.000101 grad: 0.1495 (0.1746) loss: 0.7154 (0.7064) time: 0.1525 data: 0.0686 max mem: 9377 +Train: [32] [3600/6250] eta: 0:07:21 lr: 0.000101 grad: 0.1576 (0.1741) loss: 0.6976 (0.7064) time: 0.1557 data: 0.0689 max mem: 9377 +Train: [32] [3700/6250] eta: 0:07:03 lr: 0.000101 grad: 0.1566 (0.1737) loss: 0.6972 (0.7064) time: 0.1473 data: 0.0641 max mem: 9377 +Train: [32] [3800/6250] eta: 0:06:46 lr: 0.000101 grad: 0.1494 (0.1731) loss: 0.7175 (0.7066) time: 0.1693 data: 0.0840 max mem: 9377 +Train: [32] [3900/6250] eta: 0:06:29 lr: 0.000101 grad: 0.1598 (0.1727) loss: 0.6989 (0.7067) time: 0.2356 data: 0.1501 max mem: 9377 +Train: [32] [4000/6250] eta: 0:06:11 lr: 0.000101 grad: 0.1564 (0.1724) loss: 0.7031 (0.7067) time: 0.1542 data: 0.0628 max mem: 9377 +Train: [32] [4100/6250] eta: 0:05:54 lr: 0.000101 grad: 0.1513 (0.1720) loss: 0.7170 (0.7066) time: 0.1446 data: 0.0622 max mem: 9377 +Train: [32] [4200/6250] eta: 0:05:37 lr: 0.000101 grad: 0.1465 (0.1716) loss: 0.7130 (0.7066) time: 0.1528 data: 0.0559 max mem: 9377 +Train: [32] [4300/6250] eta: 0:05:20 lr: 0.000101 grad: 0.1519 (0.1711) loss: 0.7023 (0.7066) time: 0.1472 data: 0.0628 max mem: 9377 +Train: [32] [4400/6250] eta: 0:05:03 lr: 0.000101 grad: 0.1567 (0.1707) loss: 0.7095 (0.7066) time: 0.1729 data: 0.0855 max mem: 9377 +Train: [32] [4500/6250] eta: 0:04:47 lr: 0.000101 grad: 0.1525 (0.1703) loss: 0.7032 (0.7066) time: 0.1403 data: 0.0442 max mem: 9377 +Train: [32] [4600/6250] eta: 0:04:30 lr: 0.000101 grad: 0.1518 (0.1699) loss: 0.6982 (0.7066) time: 0.1434 data: 0.0567 max mem: 9377 +Train: [32] [4700/6250] eta: 0:04:13 lr: 0.000100 grad: 0.1554 (0.1697) loss: 0.7096 (0.7067) time: 0.1351 data: 0.0428 max mem: 9377 +Train: [32] [4800/6250] eta: 0:03:56 lr: 0.000100 grad: 0.1551 (0.1694) loss: 0.7012 (0.7067) time: 0.1675 data: 0.0772 max mem: 9377 +Train: [32] [4900/6250] eta: 0:03:40 lr: 0.000100 grad: 0.1552 (0.1691) loss: 0.6938 (0.7066) time: 0.1764 data: 0.0873 max mem: 9377 +Train: [32] [5000/6250] eta: 0:03:23 lr: 0.000100 grad: 0.1513 (0.1687) loss: 0.7122 (0.7066) time: 0.1646 data: 0.0788 max mem: 9377 +Train: [32] [5100/6250] eta: 0:03:07 lr: 0.000100 grad: 0.1460 (0.1684) loss: 0.6983 (0.7067) time: 0.1583 data: 0.0829 max mem: 9377 +Train: [32] [5200/6250] eta: 0:02:51 lr: 0.000100 grad: 0.1560 (0.1680) loss: 0.6986 (0.7067) time: 0.1619 data: 0.0796 max mem: 9377 +Train: [32] [5300/6250] eta: 0:02:34 lr: 0.000100 grad: 0.1490 (0.1677) loss: 0.7164 (0.7068) time: 0.1588 data: 0.0678 max mem: 9377 +Train: [32] [5400/6250] eta: 0:02:18 lr: 0.000100 grad: 0.1477 (0.1674) loss: 0.7154 (0.7070) time: 0.1464 data: 0.0575 max mem: 9377 +Train: [32] [5500/6250] eta: 0:02:01 lr: 0.000100 grad: 0.1514 (0.1671) loss: 0.7063 (0.7071) time: 0.1529 data: 0.0663 max mem: 9377 +Train: [32] [5600/6250] eta: 0:01:45 lr: 0.000100 grad: 0.1490 (0.1668) loss: 0.7109 (0.7072) time: 0.1446 data: 0.0492 max mem: 9377 +Train: [32] [5700/6250] eta: 0:01:29 lr: 0.000100 grad: 0.1490 (0.1665) loss: 0.7250 (0.7074) time: 0.1472 data: 0.0601 max mem: 9377 +Train: [32] [5800/6250] eta: 0:01:12 lr: 0.000100 grad: 0.1504 (0.1663) loss: 0.7113 (0.7076) time: 0.1453 data: 0.0626 max mem: 9377 +Train: [32] [5900/6250] eta: 0:00:56 lr: 0.000100 grad: 0.1437 (0.1660) loss: 0.7130 (0.7077) time: 0.0945 data: 0.0065 max mem: 9377 +Train: [32] [6000/6250] eta: 0:00:40 lr: 0.000100 grad: 0.1504 (0.1658) loss: 0.7099 (0.7077) time: 0.1262 data: 0.0313 max mem: 9377 +Train: [32] [6100/6250] eta: 0:00:24 lr: 0.000100 grad: 0.1542 (0.1655) loss: 0.7148 (0.7078) time: 0.1521 data: 0.0614 max mem: 9377 +Train: [32] [6200/6250] eta: 0:00:08 lr: 0.000100 grad: 0.1526 (0.1653) loss: 0.7091 (0.7078) time: 0.1452 data: 0.0600 max mem: 9377 +Train: [32] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1596 (0.1653) loss: 0.7068 (0.7078) time: 0.1246 data: 0.0333 max mem: 9377 +Train: [32] Total time: 0:16:56 (0.1626 s / it) +Averaged stats: lr: 0.000100 grad: 0.1596 (0.1653) loss: 0.7068 (0.7078) +Eval (hcp-train-subset): [32] [ 0/62] eta: 0:06:26 loss: 0.8704 (0.8704) time: 6.2403 data: 6.2096 max mem: 9377 +Eval (hcp-train-subset): [32] [61/62] eta: 0:00:00 loss: 0.8794 (0.8800) time: 0.1309 data: 0.1041 max mem: 9377 +Eval (hcp-train-subset): [32] Total time: 0:00:14 (0.2351 s / it) +Averaged stats (hcp-train-subset): loss: 0.8794 (0.8800) +Eval (hcp-val): [32] [ 0/62] eta: 0:05:31 loss: 0.8838 (0.8838) time: 5.3506 data: 5.3063 max mem: 9377 +Eval (hcp-val): [32] [61/62] eta: 0:00:00 loss: 0.8754 (0.8782) time: 0.1514 data: 0.1264 max mem: 9377 +Eval (hcp-val): [32] Total time: 0:00:14 (0.2378 s / it) +Averaged stats (hcp-val): loss: 0.8754 (0.8782) +Eval (nsd-val): [32] [ 0/62] eta: 0:03:19 loss: 0.8531 (0.8531) time: 3.2108 data: 3.1299 max mem: 9377 +Eval (nsd-val): [32] [61/62] eta: 0:00:00 loss: 0.8592 (0.8590) time: 0.1597 data: 0.1333 max mem: 9377 +Eval (nsd-val): [32] Total time: 0:00:14 (0.2415 s / it) +Averaged stats (nsd-val): loss: 0.8592 (0.8590) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [33] [ 0/6250] eta: 11:50:49 lr: 0.000100 grad: 0.2406 (0.2406) loss: 0.5636 (0.5636) time: 6.8239 data: 6.6659 max mem: 9377 +Train: [33] [ 100/6250] eta: 0:23:48 lr: 0.000100 grad: 0.2981 (0.3186) loss: 0.7251 (0.7259) time: 0.1776 data: 0.0676 max mem: 9377 +Train: [33] [ 200/6250] eta: 0:21:07 lr: 0.000100 grad: 0.2100 (0.3127) loss: 0.7437 (0.7331) time: 0.1620 data: 0.0581 max mem: 9377 +Train: [33] [ 300/6250] eta: 0:20:21 lr: 0.000100 grad: 0.1826 (0.2784) loss: 0.7438 (0.7312) time: 0.2242 data: 0.1337 max mem: 9377 +Train: [33] [ 400/6250] eta: 0:19:20 lr: 0.000100 grad: 0.2112 (0.2641) loss: 0.7146 (0.7282) time: 0.1705 data: 0.0653 max mem: 9377 +Train: [33] [ 500/6250] eta: 0:18:44 lr: 0.000100 grad: 0.1902 (0.2533) loss: 0.7190 (0.7258) time: 0.2153 data: 0.1275 max mem: 9377 +Train: [33] [ 600/6250] eta: 0:18:05 lr: 0.000100 grad: 0.2513 (0.2478) loss: 0.7098 (0.7226) time: 0.1673 data: 0.0621 max mem: 9377 +Train: [33] [ 700/6250] eta: 0:17:42 lr: 0.000100 grad: 0.2010 (0.2433) loss: 0.7172 (0.7208) time: 0.1793 data: 0.0883 max mem: 9377 +Train: [33] [ 800/6250] eta: 0:17:18 lr: 0.000100 grad: 0.1793 (0.2351) loss: 0.6964 (0.7194) time: 0.2239 data: 0.1269 max mem: 9377 +Train: [33] [ 900/6250] eta: 0:16:47 lr: 0.000100 grad: 0.1571 (0.2276) loss: 0.7090 (0.7178) time: 0.1661 data: 0.0685 max mem: 9377 +Train: [33] [1000/6250] eta: 0:16:14 lr: 0.000100 grad: 0.1636 (0.2212) loss: 0.6890 (0.7167) time: 0.1587 data: 0.0615 max mem: 9377 +Train: [33] [1100/6250] eta: 0:15:42 lr: 0.000100 grad: 0.1568 (0.2157) loss: 0.7081 (0.7152) time: 0.1557 data: 0.0708 max mem: 9377 +Train: [33] [1200/6250] eta: 0:15:14 lr: 0.000100 grad: 0.1631 (0.2110) loss: 0.6892 (0.7137) time: 0.1586 data: 0.0695 max mem: 9377 +Train: [33] [1300/6250] eta: 0:14:51 lr: 0.000100 grad: 0.1530 (0.2069) loss: 0.6980 (0.7127) time: 0.1563 data: 0.0752 max mem: 9377 +Train: [33] [1400/6250] eta: 0:14:29 lr: 0.000100 grad: 0.1462 (0.2031) loss: 0.7076 (0.7117) time: 0.1760 data: 0.0845 max mem: 9377 +Train: [33] [1500/6250] eta: 0:14:06 lr: 0.000100 grad: 0.1545 (0.1999) loss: 0.6952 (0.7106) time: 0.1439 data: 0.0532 max mem: 9377 +Train: [33] [1600/6250] eta: 0:13:39 lr: 0.000100 grad: 0.1640 (0.1971) loss: 0.6826 (0.7095) time: 0.1455 data: 0.0574 max mem: 9377 +Train: [33] [1700/6250] eta: 0:13:20 lr: 0.000100 grad: 0.1535 (0.1946) loss: 0.6940 (0.7082) time: 0.1716 data: 0.0876 max mem: 9377 +Train: [33] [1800/6250] eta: 0:12:59 lr: 0.000100 grad: 0.1544 (0.1925) loss: 0.6915 (0.7072) time: 0.1470 data: 0.0566 max mem: 9377 +Train: [33] [1900/6250] eta: 0:12:38 lr: 0.000100 grad: 0.1494 (0.1906) loss: 0.6868 (0.7063) time: 0.1660 data: 0.0740 max mem: 9377 +Train: [33] [2000/6250] eta: 0:12:17 lr: 0.000100 grad: 0.1437 (0.1887) loss: 0.7139 (0.7059) time: 0.1261 data: 0.0317 max mem: 9377 +Train: [33] [2100/6250] eta: 0:11:56 lr: 0.000100 grad: 0.1534 (0.1870) loss: 0.6954 (0.7055) time: 0.1435 data: 0.0456 max mem: 9377 +Train: [33] [2200/6250] eta: 0:11:38 lr: 0.000099 grad: 0.1556 (0.1857) loss: 0.6892 (0.7049) time: 0.1768 data: 0.0896 max mem: 9377 +Train: [33] [2300/6250] eta: 0:11:17 lr: 0.000099 grad: 0.1558 (0.1844) loss: 0.6850 (0.7045) time: 0.1690 data: 0.0765 max mem: 9377 +Train: [33] [2400/6250] eta: 0:10:58 lr: 0.000099 grad: 0.1486 (0.1832) loss: 0.6975 (0.7044) time: 0.1776 data: 0.0895 max mem: 9377 +Train: [33] [2500/6250] eta: 0:10:38 lr: 0.000099 grad: 0.1406 (0.1818) loss: 0.7041 (0.7045) time: 0.1376 data: 0.0491 max mem: 9377 +Train: [33] [2600/6250] eta: 0:10:21 lr: 0.000099 grad: 0.1555 (0.1810) loss: 0.7006 (0.7044) time: 0.1576 data: 0.0549 max mem: 9377 +Train: [33] [2700/6250] eta: 0:10:03 lr: 0.000099 grad: 0.1537 (0.1800) loss: 0.6949 (0.7043) time: 0.1701 data: 0.0734 max mem: 9377 +Train: [33] [2800/6250] eta: 0:09:45 lr: 0.000099 grad: 0.1521 (0.1791) loss: 0.6997 (0.7042) time: 0.1629 data: 0.0637 max mem: 9377 +Train: [33] [2900/6250] eta: 0:09:25 lr: 0.000099 grad: 0.1546 (0.1783) loss: 0.7137 (0.7042) time: 0.1467 data: 0.0594 max mem: 9377 +Train: [33] [3000/6250] eta: 0:09:07 lr: 0.000099 grad: 0.1759 (0.1776) loss: 0.6978 (0.7041) time: 0.1688 data: 0.0834 max mem: 9377 +Train: [33] [3100/6250] eta: 0:08:48 lr: 0.000099 grad: 0.1502 (0.1770) loss: 0.7000 (0.7040) time: 0.1449 data: 0.0654 max mem: 9377 +Train: [33] [3200/6250] eta: 0:08:30 lr: 0.000099 grad: 0.1637 (0.1764) loss: 0.6918 (0.7039) time: 0.1533 data: 0.0704 max mem: 9377 +Train: [33] [3300/6250] eta: 0:08:13 lr: 0.000099 grad: 0.1536 (0.1758) loss: 0.6916 (0.7039) time: 0.1920 data: 0.1195 max mem: 9377 +Train: [33] [3400/6250] eta: 0:07:57 lr: 0.000099 grad: 0.1481 (0.1751) loss: 0.7061 (0.7039) time: 0.1522 data: 0.0497 max mem: 9377 +Train: [33] [3500/6250] eta: 0:07:40 lr: 0.000099 grad: 0.1590 (0.1746) loss: 0.6948 (0.7039) time: 0.1560 data: 0.0754 max mem: 9377 +Train: [33] [3600/6250] eta: 0:07:24 lr: 0.000099 grad: 0.1503 (0.1740) loss: 0.7079 (0.7038) time: 0.1186 data: 0.0274 max mem: 9377 +Train: [33] [3700/6250] eta: 0:07:07 lr: 0.000099 grad: 0.1531 (0.1735) loss: 0.6958 (0.7038) time: 0.1531 data: 0.0666 max mem: 9377 +Train: [33] [3800/6250] eta: 0:06:49 lr: 0.000099 grad: 0.1453 (0.1730) loss: 0.7013 (0.7036) time: 0.1486 data: 0.0671 max mem: 9377 +Train: [33] [3900/6250] eta: 0:06:31 lr: 0.000099 grad: 0.1555 (0.1726) loss: 0.6903 (0.7034) time: 0.1515 data: 0.0679 max mem: 9377 +Train: [33] [4000/6250] eta: 0:06:15 lr: 0.000099 grad: 0.1571 (0.1722) loss: 0.6987 (0.7034) time: 0.1887 data: 0.1051 max mem: 9377 +Train: [33] [4100/6250] eta: 0:05:57 lr: 0.000099 grad: 0.1589 (0.1718) loss: 0.6924 (0.7031) time: 0.1778 data: 0.0916 max mem: 9377 +Train: [33] [4200/6250] eta: 0:05:40 lr: 0.000099 grad: 0.1448 (0.1714) loss: 0.7169 (0.7032) time: 0.1675 data: 0.0818 max mem: 9377 +Train: [33] [4300/6250] eta: 0:05:24 lr: 0.000099 grad: 0.1548 (0.1710) loss: 0.6896 (0.7030) time: 0.1622 data: 0.0734 max mem: 9377 +Train: [33] [4400/6250] eta: 0:05:07 lr: 0.000099 grad: 0.1493 (0.1706) loss: 0.6907 (0.7030) time: 0.1637 data: 0.0753 max mem: 9377 +Train: [33] [4500/6250] eta: 0:04:50 lr: 0.000099 grad: 0.1604 (0.1703) loss: 0.7001 (0.7030) time: 0.1485 data: 0.0658 max mem: 9377 +Train: [33] [4600/6250] eta: 0:04:33 lr: 0.000099 grad: 0.1598 (0.1701) loss: 0.6890 (0.7031) time: 0.1621 data: 0.0745 max mem: 9377 +Train: [33] [4700/6250] eta: 0:04:16 lr: 0.000099 grad: 0.1573 (0.1699) loss: 0.6976 (0.7030) time: 0.1471 data: 0.0494 max mem: 9377 +Train: [33] [4800/6250] eta: 0:03:59 lr: 0.000099 grad: 0.1583 (0.1697) loss: 0.6966 (0.7030) time: 0.1755 data: 0.0932 max mem: 9377 +Train: [33] [4900/6250] eta: 0:03:42 lr: 0.000099 grad: 0.1511 (0.1694) loss: 0.6979 (0.7029) time: 0.1625 data: 0.0700 max mem: 9377 +Train: [33] [5000/6250] eta: 0:03:25 lr: 0.000099 grad: 0.1548 (0.1691) loss: 0.7059 (0.7029) time: 0.1366 data: 0.0510 max mem: 9377 +Train: [33] [5100/6250] eta: 0:03:09 lr: 0.000099 grad: 0.1483 (0.1688) loss: 0.7087 (0.7029) time: 0.1286 data: 0.0356 max mem: 9377 +Train: [33] [5200/6250] eta: 0:02:52 lr: 0.000099 grad: 0.1515 (0.1686) loss: 0.6937 (0.7029) time: 0.1556 data: 0.0692 max mem: 9377 +Train: [33] [5300/6250] eta: 0:02:35 lr: 0.000099 grad: 0.1534 (0.1683) loss: 0.7054 (0.7030) time: 0.1599 data: 0.0799 max mem: 9377 +Train: [33] [5400/6250] eta: 0:02:19 lr: 0.000099 grad: 0.1607 (0.1682) loss: 0.7093 (0.7031) time: 0.1556 data: 0.0654 max mem: 9377 +Train: [33] [5500/6250] eta: 0:02:02 lr: 0.000099 grad: 0.1549 (0.1680) loss: 0.6983 (0.7031) time: 0.1346 data: 0.0322 max mem: 9377 +Train: [33] [5600/6250] eta: 0:01:46 lr: 0.000099 grad: 0.1486 (0.1678) loss: 0.7118 (0.7032) time: 0.1581 data: 0.0667 max mem: 9377 +Train: [33] [5700/6250] eta: 0:01:29 lr: 0.000099 grad: 0.1586 (0.1677) loss: 0.6966 (0.7033) time: 0.1456 data: 0.0563 max mem: 9377 +Train: [33] [5800/6250] eta: 0:01:13 lr: 0.000099 grad: 0.1581 (0.1675) loss: 0.7073 (0.7033) time: 0.1690 data: 0.0709 max mem: 9377 +Train: [33] [5900/6250] eta: 0:00:57 lr: 0.000098 grad: 0.1514 (0.1673) loss: 0.7040 (0.7033) time: 0.1947 data: 0.1074 max mem: 9377 +Train: [33] [6000/6250] eta: 0:00:40 lr: 0.000098 grad: 0.1591 (0.1671) loss: 0.6987 (0.7032) time: 0.1309 data: 0.0336 max mem: 9377 +Train: [33] [6100/6250] eta: 0:00:24 lr: 0.000098 grad: 0.1539 (0.1670) loss: 0.6960 (0.7032) time: 0.1571 data: 0.0601 max mem: 9377 +Train: [33] [6200/6250] eta: 0:00:08 lr: 0.000098 grad: 0.1545 (0.1668) loss: 0.6936 (0.7031) time: 0.1725 data: 0.0858 max mem: 9377 +Train: [33] [6249/6250] eta: 0:00:00 lr: 0.000098 grad: 0.1483 (0.1667) loss: 0.7037 (0.7031) time: 0.1710 data: 0.0878 max mem: 9377 +Train: [33] Total time: 0:17:05 (0.1641 s / it) +Averaged stats: lr: 0.000098 grad: 0.1483 (0.1667) loss: 0.7037 (0.7031) +Eval (hcp-train-subset): [33] [ 0/62] eta: 0:06:32 loss: 0.8741 (0.8741) time: 6.3371 data: 6.3061 max mem: 9377 +Eval (hcp-train-subset): [33] [61/62] eta: 0:00:00 loss: 0.8821 (0.8860) time: 0.1668 data: 0.1416 max mem: 9377 +Eval (hcp-train-subset): [33] Total time: 0:00:15 (0.2506 s / it) +Averaged stats (hcp-train-subset): loss: 0.8821 (0.8860) +Eval (hcp-val): [33] [ 0/62] eta: 0:04:04 loss: 0.8847 (0.8847) time: 3.9515 data: 3.8875 max mem: 9377 +Eval (hcp-val): [33] [61/62] eta: 0:00:00 loss: 0.8830 (0.8836) time: 0.1483 data: 0.1216 max mem: 9377 +Eval (hcp-val): [33] Total time: 0:00:14 (0.2363 s / it) +Averaged stats (hcp-val): loss: 0.8830 (0.8836) +Eval (nsd-val): [33] [ 0/62] eta: 0:05:19 loss: 0.8498 (0.8498) time: 5.1454 data: 5.1099 max mem: 9377 +Eval (nsd-val): [33] [61/62] eta: 0:00:00 loss: 0.8625 (0.8647) time: 0.1473 data: 0.1203 max mem: 9377 +Eval (nsd-val): [33] Total time: 0:00:15 (0.2446 s / it) +Averaged stats (nsd-val): loss: 0.8625 (0.8647) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [34] [ 0/6250] eta: 10:37:25 lr: 0.000098 grad: 0.1802 (0.1802) loss: 0.7776 (0.7776) time: 6.1193 data: 5.9360 max mem: 9377 +Train: [34] [ 100/6250] eta: 0:25:03 lr: 0.000098 grad: 0.3490 (0.3412) loss: 0.6978 (0.7314) time: 0.1666 data: 0.0508 max mem: 9377 +Train: [34] [ 200/6250] eta: 0:21:49 lr: 0.000098 grad: 0.2336 (0.3310) loss: 0.7056 (0.7192) time: 0.1984 data: 0.1006 max mem: 9377 +Train: [34] [ 300/6250] eta: 0:19:50 lr: 0.000098 grad: 0.2439 (0.3050) loss: 0.7188 (0.7161) time: 0.1682 data: 0.0746 max mem: 9377 +Train: [34] [ 400/6250] eta: 0:18:59 lr: 0.000098 grad: 0.2147 (0.2928) loss: 0.7147 (0.7140) time: 0.1678 data: 0.0755 max mem: 9377 +Train: [34] [ 500/6250] eta: 0:18:36 lr: 0.000098 grad: 0.2086 (0.2776) loss: 0.6997 (0.7122) time: 0.1842 data: 0.0964 max mem: 9377 +Train: [34] [ 600/6250] eta: 0:18:01 lr: 0.000098 grad: 0.2312 (0.2736) loss: 0.7060 (0.7097) time: 0.1742 data: 0.0872 max mem: 9377 +Train: [34] [ 700/6250] eta: 0:17:32 lr: 0.000098 grad: 0.2031 (0.2651) loss: 0.7055 (0.7082) time: 0.1888 data: 0.0870 max mem: 9377 +Train: [34] [ 800/6250] eta: 0:17:02 lr: 0.000098 grad: 0.1959 (0.2588) loss: 0.7221 (0.7083) time: 0.1779 data: 0.0791 max mem: 9377 +Train: [34] [ 900/6250] eta: 0:16:31 lr: 0.000098 grad: 0.1709 (0.2510) loss: 0.7063 (0.7078) time: 0.1693 data: 0.0649 max mem: 9377 +Train: [34] [1000/6250] eta: 0:16:00 lr: 0.000098 grad: 0.1604 (0.2439) loss: 0.7148 (0.7078) time: 0.1585 data: 0.0706 max mem: 9377 +Train: [34] [1100/6250] eta: 0:15:29 lr: 0.000098 grad: 0.1590 (0.2362) loss: 0.6963 (0.7075) time: 0.1677 data: 0.0798 max mem: 9377 +Train: [34] [1200/6250] eta: 0:14:58 lr: 0.000098 grad: 0.1475 (0.2294) loss: 0.7095 (0.7069) time: 0.1466 data: 0.0627 max mem: 9377 +Train: [34] [1300/6250] eta: 0:14:30 lr: 0.000098 grad: 0.1561 (0.2240) loss: 0.6986 (0.7063) time: 0.1494 data: 0.0551 max mem: 9377 +Train: [34] [1400/6250] eta: 0:14:12 lr: 0.000098 grad: 0.1445 (0.2191) loss: 0.7278 (0.7062) time: 0.1825 data: 0.0888 max mem: 9377 +Train: [34] [1500/6250] eta: 0:13:49 lr: 0.000098 grad: 0.1475 (0.2149) loss: 0.7166 (0.7063) time: 0.1632 data: 0.0758 max mem: 9377 +Train: [34] [1600/6250] eta: 0:13:27 lr: 0.000098 grad: 0.1489 (0.2114) loss: 0.6851 (0.7058) time: 0.1699 data: 0.0841 max mem: 9377 +Train: [34] [1700/6250] eta: 0:13:03 lr: 0.000098 grad: 0.1554 (0.2081) loss: 0.6974 (0.7053) time: 0.1596 data: 0.0818 max mem: 9377 +Train: [34] [1800/6250] eta: 0:12:45 lr: 0.000098 grad: 0.1485 (0.2052) loss: 0.6868 (0.7048) time: 0.1485 data: 0.0508 max mem: 9377 +Train: [34] [1900/6250] eta: 0:12:25 lr: 0.000098 grad: 0.1584 (0.2025) loss: 0.6930 (0.7044) time: 0.1503 data: 0.0504 max mem: 9377 +Train: [34] [2000/6250] eta: 0:12:04 lr: 0.000098 grad: 0.1560 (0.2002) loss: 0.6909 (0.7040) time: 0.1683 data: 0.0827 max mem: 9377 +Train: [34] [2100/6250] eta: 0:11:44 lr: 0.000098 grad: 0.1641 (0.1981) loss: 0.6845 (0.7036) time: 0.1458 data: 0.0587 max mem: 9377 +Train: [34] [2200/6250] eta: 0:11:25 lr: 0.000098 grad: 0.1629 (0.1964) loss: 0.6856 (0.7033) time: 0.1662 data: 0.0750 max mem: 9377 +Train: [34] [2300/6250] eta: 0:11:08 lr: 0.000098 grad: 0.1495 (0.1946) loss: 0.7014 (0.7032) time: 0.1492 data: 0.0660 max mem: 9377 +Train: [34] [2400/6250] eta: 0:10:51 lr: 0.000098 grad: 0.1463 (0.1928) loss: 0.6934 (0.7032) time: 0.1886 data: 0.1044 max mem: 9377 +Train: [34] [2500/6250] eta: 0:10:32 lr: 0.000098 grad: 0.1504 (0.1914) loss: 0.7028 (0.7030) time: 0.1661 data: 0.0843 max mem: 9377 +Train: [34] [2600/6250] eta: 0:10:12 lr: 0.000098 grad: 0.1499 (0.1901) loss: 0.7054 (0.7028) time: 0.1266 data: 0.0374 max mem: 9377 +Train: [34] [2700/6250] eta: 0:09:53 lr: 0.000098 grad: 0.1487 (0.1887) loss: 0.6919 (0.7027) time: 0.1318 data: 0.0395 max mem: 9377 +Train: [34] [2800/6250] eta: 0:09:35 lr: 0.000098 grad: 0.1510 (0.1875) loss: 0.7038 (0.7026) time: 0.1414 data: 0.0511 max mem: 9377 +Train: [34] [2900/6250] eta: 0:09:16 lr: 0.000098 grad: 0.1653 (0.1863) loss: 0.6915 (0.7027) time: 0.1440 data: 0.0508 max mem: 9377 +Train: [34] [3000/6250] eta: 0:08:57 lr: 0.000098 grad: 0.1514 (0.1853) loss: 0.7089 (0.7026) time: 0.1429 data: 0.0526 max mem: 9377 +Train: [34] [3100/6250] eta: 0:08:39 lr: 0.000098 grad: 0.1465 (0.1842) loss: 0.6985 (0.7027) time: 0.1270 data: 0.0331 max mem: 9377 +Train: [34] [3200/6250] eta: 0:08:21 lr: 0.000098 grad: 0.1516 (0.1833) loss: 0.6995 (0.7027) time: 0.1575 data: 0.0647 max mem: 9377 +Train: [34] [3300/6250] eta: 0:08:04 lr: 0.000097 grad: 0.1474 (0.1823) loss: 0.7170 (0.7027) time: 0.1741 data: 0.0773 max mem: 9377 +Train: [34] [3400/6250] eta: 0:07:45 lr: 0.000097 grad: 0.1517 (0.1815) loss: 0.6944 (0.7028) time: 0.1331 data: 0.0418 max mem: 9377 +Train: [34] [3500/6250] eta: 0:07:28 lr: 0.000097 grad: 0.1427 (0.1807) loss: 0.6975 (0.7029) time: 0.1529 data: 0.0661 max mem: 9377 +Train: [34] [3600/6250] eta: 0:07:12 lr: 0.000097 grad: 0.1536 (0.1799) loss: 0.6984 (0.7029) time: 0.1428 data: 0.0349 max mem: 9377 +Train: [34] [3700/6250] eta: 0:06:55 lr: 0.000097 grad: 0.1499 (0.1792) loss: 0.7078 (0.7028) time: 0.1516 data: 0.0671 max mem: 9377 +Train: [34] [3800/6250] eta: 0:06:38 lr: 0.000097 grad: 0.1541 (0.1786) loss: 0.6915 (0.7028) time: 0.1514 data: 0.0671 max mem: 9377 +Train: [34] [3900/6250] eta: 0:06:21 lr: 0.000097 grad: 0.1531 (0.1780) loss: 0.7090 (0.7029) time: 0.1382 data: 0.0449 max mem: 9377 +Train: [34] [4000/6250] eta: 0:06:04 lr: 0.000097 grad: 0.1557 (0.1775) loss: 0.6950 (0.7028) time: 0.1458 data: 0.0632 max mem: 9377 +Train: [34] [4100/6250] eta: 0:05:47 lr: 0.000097 grad: 0.1547 (0.1770) loss: 0.7105 (0.7028) time: 0.1562 data: 0.0670 max mem: 9377 +Train: [34] [4200/6250] eta: 0:05:31 lr: 0.000097 grad: 0.1479 (0.1764) loss: 0.7070 (0.7029) time: 0.1861 data: 0.1082 max mem: 9377 +Train: [34] [4300/6250] eta: 0:05:14 lr: 0.000097 grad: 0.1510 (0.1759) loss: 0.6997 (0.7030) time: 0.1939 data: 0.1092 max mem: 9377 +Train: [34] [4400/6250] eta: 0:04:57 lr: 0.000097 grad: 0.1543 (0.1754) loss: 0.6966 (0.7030) time: 0.1418 data: 0.0393 max mem: 9377 +Train: [34] [4500/6250] eta: 0:04:41 lr: 0.000097 grad: 0.1503 (0.1749) loss: 0.7072 (0.7031) time: 0.1608 data: 0.0720 max mem: 9377 +Train: [34] [4600/6250] eta: 0:04:25 lr: 0.000097 grad: 0.1505 (0.1744) loss: 0.6942 (0.7032) time: 0.1681 data: 0.0804 max mem: 9377 +Train: [34] [4700/6250] eta: 0:04:09 lr: 0.000097 grad: 0.1564 (0.1740) loss: 0.7090 (0.7033) time: 0.1689 data: 0.0829 max mem: 9377 +Train: [34] [4800/6250] eta: 0:03:52 lr: 0.000097 grad: 0.1540 (0.1736) loss: 0.6934 (0.7033) time: 0.1625 data: 0.0722 max mem: 9377 +Train: [34] [4900/6250] eta: 0:03:36 lr: 0.000097 grad: 0.1460 (0.1732) loss: 0.7182 (0.7033) time: 0.1678 data: 0.0824 max mem: 9377 +Train: [34] [5000/6250] eta: 0:03:20 lr: 0.000097 grad: 0.1500 (0.1728) loss: 0.7105 (0.7035) time: 0.1487 data: 0.0643 max mem: 9377 +Train: [34] [5100/6250] eta: 0:03:04 lr: 0.000097 grad: 0.1519 (0.1725) loss: 0.6960 (0.7036) time: 0.1052 data: 0.0007 max mem: 9377 +Train: [34] [5200/6250] eta: 0:02:48 lr: 0.000097 grad: 0.1467 (0.1721) loss: 0.7180 (0.7036) time: 0.1543 data: 0.0641 max mem: 9377 +Train: [34] [5300/6250] eta: 0:02:31 lr: 0.000097 grad: 0.1533 (0.1718) loss: 0.6929 (0.7036) time: 0.1543 data: 0.0704 max mem: 9377 +Train: [34] [5400/6250] eta: 0:02:16 lr: 0.000097 grad: 0.1584 (0.1715) loss: 0.6986 (0.7035) time: 0.1481 data: 0.0580 max mem: 9377 +Train: [34] [5500/6250] eta: 0:02:00 lr: 0.000097 grad: 0.1561 (0.1712) loss: 0.7033 (0.7034) time: 0.1583 data: 0.0690 max mem: 9377 +Train: [34] [5600/6250] eta: 0:01:44 lr: 0.000097 grad: 0.1568 (0.1710) loss: 0.7005 (0.7034) time: 0.1850 data: 0.0932 max mem: 9377 +Train: [34] [5700/6250] eta: 0:01:28 lr: 0.000097 grad: 0.1578 (0.1707) loss: 0.6939 (0.7034) time: 0.1651 data: 0.0776 max mem: 9377 +Train: [34] [5800/6250] eta: 0:01:12 lr: 0.000097 grad: 0.1577 (0.1705) loss: 0.6987 (0.7033) time: 0.1587 data: 0.0554 max mem: 9377 +Train: [34] [5900/6250] eta: 0:00:56 lr: 0.000097 grad: 0.1540 (0.1701) loss: 0.6985 (0.7033) time: 0.1539 data: 0.0675 max mem: 9377 +Train: [34] [6000/6250] eta: 0:00:40 lr: 0.000097 grad: 0.1447 (0.1698) loss: 0.6990 (0.7033) time: 0.1568 data: 0.0735 max mem: 9377 +Train: [34] [6100/6250] eta: 0:00:24 lr: 0.000097 grad: 0.1532 (0.1696) loss: 0.6975 (0.7033) time: 0.1342 data: 0.0419 max mem: 9377 +Train: [34] [6200/6250] eta: 0:00:08 lr: 0.000097 grad: 0.1512 (0.1694) loss: 0.7091 (0.7032) time: 0.1834 data: 0.0980 max mem: 9377 +Train: [34] [6249/6250] eta: 0:00:00 lr: 0.000097 grad: 0.1520 (0.1693) loss: 0.7002 (0.7032) time: 0.1555 data: 0.0691 max mem: 9377 +Train: [34] Total time: 0:16:47 (0.1613 s / it) +Averaged stats: lr: 0.000097 grad: 0.1520 (0.1693) loss: 0.7002 (0.7032) +Eval (hcp-train-subset): [34] [ 0/62] eta: 0:05:18 loss: 0.8753 (0.8753) time: 5.1447 data: 5.1127 max mem: 9377 +Eval (hcp-train-subset): [34] [61/62] eta: 0:00:00 loss: 0.8850 (0.8879) time: 0.1523 data: 0.1254 max mem: 9377 +Eval (hcp-train-subset): [34] Total time: 0:00:14 (0.2396 s / it) +Averaged stats (hcp-train-subset): loss: 0.8850 (0.8879) +Making plots (hcp-train-subset): example=59 +Eval (hcp-val): [34] [ 0/62] eta: 0:05:20 loss: 0.8916 (0.8916) time: 5.1761 data: 5.1449 max mem: 9377 +Eval (hcp-val): [34] [61/62] eta: 0:00:00 loss: 0.8843 (0.8874) time: 0.1331 data: 0.1058 max mem: 9377 +Eval (hcp-val): [34] Total time: 0:00:15 (0.2508 s / it) +Averaged stats (hcp-val): loss: 0.8843 (0.8874) +Making plots (hcp-val): example=45 +Eval (nsd-val): [34] [ 0/62] eta: 0:04:28 loss: 0.8584 (0.8584) time: 4.3265 data: 4.2691 max mem: 9377 +Eval (nsd-val): [34] [61/62] eta: 0:00:00 loss: 0.8652 (0.8672) time: 0.1493 data: 0.1239 max mem: 9377 +Eval (nsd-val): [34] Total time: 0:00:14 (0.2416 s / it) +Averaged stats (nsd-val): loss: 0.8652 (0.8672) +Making plots (nsd-val): example=30 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00034.pth +Train: [35] [ 0/6250] eta: 10:21:30 lr: 0.000097 grad: 0.3650 (0.3650) loss: 0.5962 (0.5962) time: 5.9664 data: 5.6849 max mem: 9377 +Train: [35] [ 100/6250] eta: 0:23:19 lr: 0.000097 grad: 0.3357 (0.3183) loss: 0.7264 (0.7416) time: 0.1554 data: 0.0536 max mem: 9377 +Train: [35] [ 200/6250] eta: 0:20:32 lr: 0.000097 grad: 0.2808 (0.3418) loss: 0.7053 (0.7285) time: 0.1709 data: 0.0679 max mem: 9377 +Train: [35] [ 300/6250] eta: 0:20:10 lr: 0.000097 grad: 0.2518 (0.3178) loss: 0.7062 (0.7209) time: 0.1887 data: 0.0976 max mem: 9377 +Train: [35] [ 400/6250] eta: 0:19:14 lr: 0.000097 grad: 0.2061 (0.2987) loss: 0.6922 (0.7165) time: 0.1589 data: 0.0741 max mem: 9377 +Train: [35] [ 500/6250] eta: 0:18:27 lr: 0.000097 grad: 0.1931 (0.2837) loss: 0.6994 (0.7118) time: 0.1654 data: 0.0777 max mem: 9377 +Train: [35] [ 600/6250] eta: 0:17:39 lr: 0.000097 grad: 0.1721 (0.2662) loss: 0.6874 (0.7097) time: 0.1621 data: 0.0740 max mem: 9377 +Train: [35] [ 700/6250] eta: 0:17:15 lr: 0.000096 grad: 0.1658 (0.2538) loss: 0.6939 (0.7077) time: 0.1964 data: 0.1043 max mem: 9377 +Train: [35] [ 800/6250] eta: 0:16:57 lr: 0.000096 grad: 0.1939 (0.2467) loss: 0.6998 (0.7062) time: 0.1908 data: 0.0940 max mem: 9377 +Train: [35] [ 900/6250] eta: 0:16:28 lr: 0.000096 grad: 0.1806 (0.2426) loss: 0.7111 (0.7053) time: 0.1809 data: 0.0842 max mem: 9377 +Train: [35] [1000/6250] eta: 0:16:05 lr: 0.000096 grad: 0.2050 (0.2375) loss: 0.7072 (0.7049) time: 0.1928 data: 0.0972 max mem: 9377 +Train: [35] [1100/6250] eta: 0:15:36 lr: 0.000096 grad: 0.1787 (0.2341) loss: 0.7044 (0.7049) time: 0.1618 data: 0.0621 max mem: 9377 +Train: [35] [1200/6250] eta: 0:15:13 lr: 0.000096 grad: 0.1748 (0.2292) loss: 0.7089 (0.7054) time: 0.1747 data: 0.0754 max mem: 9377 +Train: [35] [1300/6250] eta: 0:14:49 lr: 0.000096 grad: 0.1735 (0.2255) loss: 0.7091 (0.7054) time: 0.2066 data: 0.1279 max mem: 9377 +Train: [35] [1400/6250] eta: 0:14:29 lr: 0.000096 grad: 0.1843 (0.2229) loss: 0.7053 (0.7059) time: 0.1829 data: 0.1042 max mem: 9377 +Train: [35] [1500/6250] eta: 0:14:11 lr: 0.000096 grad: 0.1805 (0.2209) loss: 0.7042 (0.7059) time: 0.2051 data: 0.1168 max mem: 9377 +Train: [35] [1600/6250] eta: 0:13:51 lr: 0.000096 grad: 0.1624 (0.2183) loss: 0.7111 (0.7060) time: 0.1756 data: 0.0863 max mem: 9377 +Train: [35] [1700/6250] eta: 0:13:24 lr: 0.000096 grad: 0.1585 (0.2151) loss: 0.7056 (0.7063) time: 0.1559 data: 0.0730 max mem: 9377 +Train: [35] [1800/6250] eta: 0:13:05 lr: 0.000096 grad: 0.1488 (0.2116) loss: 0.7071 (0.7065) time: 0.1656 data: 0.0752 max mem: 9377 +Train: [35] [1900/6250] eta: 0:12:44 lr: 0.000096 grad: 0.1508 (0.2086) loss: 0.7017 (0.7065) time: 0.1585 data: 0.0715 max mem: 9377 +Train: [35] [2000/6250] eta: 0:12:22 lr: 0.000096 grad: 0.1543 (0.2058) loss: 0.6951 (0.7067) time: 0.1735 data: 0.0838 max mem: 9377 +Train: [35] [2100/6250] eta: 0:11:58 lr: 0.000096 grad: 0.1492 (0.2033) loss: 0.7073 (0.7067) time: 0.1377 data: 0.0430 max mem: 9377 +Train: [35] [2200/6250] eta: 0:11:36 lr: 0.000096 grad: 0.1542 (0.2012) loss: 0.7045 (0.7068) time: 0.1408 data: 0.0510 max mem: 9377 +Train: [35] [2300/6250] eta: 0:11:16 lr: 0.000096 grad: 0.1486 (0.1991) loss: 0.7155 (0.7068) time: 0.1211 data: 0.0303 max mem: 9377 +Train: [35] [2400/6250] eta: 0:10:57 lr: 0.000096 grad: 0.1363 (0.1971) loss: 0.7175 (0.7067) time: 0.1716 data: 0.0855 max mem: 9377 +Train: [35] [2500/6250] eta: 0:10:40 lr: 0.000096 grad: 0.1498 (0.1953) loss: 0.6996 (0.7068) time: 0.1642 data: 0.0744 max mem: 9377 +Train: [35] [2600/6250] eta: 0:10:24 lr: 0.000096 grad: 0.1441 (0.1936) loss: 0.6995 (0.7069) time: 0.1792 data: 0.0810 max mem: 9377 +Train: [35] [2700/6250] eta: 0:10:07 lr: 0.000096 grad: 0.1457 (0.1920) loss: 0.7020 (0.7069) time: 0.1418 data: 0.0606 max mem: 9377 +Train: [35] [2800/6250] eta: 0:09:51 lr: 0.000096 grad: 0.1478 (0.1906) loss: 0.7106 (0.7069) time: 0.1881 data: 0.0899 max mem: 9377 +Train: [35] [2900/6250] eta: 0:09:33 lr: 0.000096 grad: 0.1456 (0.1892) loss: 0.7153 (0.7069) time: 0.1560 data: 0.0676 max mem: 9377 +Train: [35] [3000/6250] eta: 0:09:15 lr: 0.000096 grad: 0.1505 (0.1880) loss: 0.7105 (0.7069) time: 0.1746 data: 0.0947 max mem: 9377 +Train: [35] [3100/6250] eta: 0:08:56 lr: 0.000096 grad: 0.1525 (0.1869) loss: 0.7149 (0.7068) time: 0.1380 data: 0.0481 max mem: 9377 +Train: [35] [3200/6250] eta: 0:08:38 lr: 0.000096 grad: 0.1482 (0.1860) loss: 0.7098 (0.7067) time: 0.1824 data: 0.0988 max mem: 9377 +Train: [35] [3300/6250] eta: 0:08:21 lr: 0.000096 grad: 0.1518 (0.1850) loss: 0.7011 (0.7065) time: 0.1420 data: 0.0535 max mem: 9377 +Train: [35] [3400/6250] eta: 0:08:05 lr: 0.000096 grad: 0.1527 (0.1842) loss: 0.6954 (0.7063) time: 0.1752 data: 0.0897 max mem: 9377 +Train: [35] [3500/6250] eta: 0:07:48 lr: 0.000096 grad: 0.1571 (0.1835) loss: 0.7063 (0.7061) time: 0.2010 data: 0.1209 max mem: 9377 +Train: [35] [3600/6250] eta: 0:07:30 lr: 0.000096 grad: 0.1562 (0.1828) loss: 0.6932 (0.7057) time: 0.1543 data: 0.0696 max mem: 9377 +Train: [35] [3700/6250] eta: 0:07:13 lr: 0.000096 grad: 0.1545 (0.1822) loss: 0.7007 (0.7054) time: 0.1650 data: 0.0796 max mem: 9377 +Train: [35] [3800/6250] eta: 0:06:56 lr: 0.000096 grad: 0.1573 (0.1816) loss: 0.6906 (0.7050) time: 0.1726 data: 0.0869 max mem: 9377 +Train: [35] [3900/6250] eta: 0:06:39 lr: 0.000096 grad: 0.1531 (0.1810) loss: 0.6906 (0.7048) time: 0.1211 data: 0.0273 max mem: 9377 +Train: [35] [4000/6250] eta: 0:06:22 lr: 0.000096 grad: 0.1621 (0.1803) loss: 0.6937 (0.7047) time: 0.1754 data: 0.0973 max mem: 9377 +Train: [35] [4100/6250] eta: 0:06:05 lr: 0.000096 grad: 0.1543 (0.1798) loss: 0.6892 (0.7043) time: 0.1694 data: 0.0804 max mem: 9377 +Train: [35] [4200/6250] eta: 0:05:47 lr: 0.000096 grad: 0.1536 (0.1792) loss: 0.6953 (0.7041) time: 0.1605 data: 0.0622 max mem: 9377 +Train: [35] [4300/6250] eta: 0:05:29 lr: 0.000095 grad: 0.1485 (0.1786) loss: 0.6939 (0.7040) time: 0.1586 data: 0.0724 max mem: 9377 +Train: [35] [4400/6250] eta: 0:05:12 lr: 0.000095 grad: 0.1506 (0.1781) loss: 0.7040 (0.7038) time: 0.1609 data: 0.0765 max mem: 9377 +Train: [35] [4500/6250] eta: 0:04:54 lr: 0.000095 grad: 0.1505 (0.1775) loss: 0.6945 (0.7035) time: 0.1362 data: 0.0532 max mem: 9377 +Train: [35] [4600/6250] eta: 0:04:37 lr: 0.000095 grad: 0.1459 (0.1771) loss: 0.7083 (0.7034) time: 0.1698 data: 0.0821 max mem: 9377 +Train: [35] [4700/6250] eta: 0:04:20 lr: 0.000095 grad: 0.1554 (0.1766) loss: 0.7110 (0.7035) time: 0.1610 data: 0.0537 max mem: 9377 +Train: [35] [4800/6250] eta: 0:04:03 lr: 0.000095 grad: 0.1650 (0.1763) loss: 0.6889 (0.7034) time: 0.1633 data: 0.0743 max mem: 9377 +Train: [35] [4900/6250] eta: 0:03:46 lr: 0.000095 grad: 0.1573 (0.1759) loss: 0.7060 (0.7034) time: 0.1553 data: 0.0617 max mem: 9377 +Train: [35] [5000/6250] eta: 0:03:29 lr: 0.000095 grad: 0.1509 (0.1755) loss: 0.6932 (0.7034) time: 0.1697 data: 0.0832 max mem: 9377 +Train: [35] [5100/6250] eta: 0:03:12 lr: 0.000095 grad: 0.1532 (0.1751) loss: 0.7021 (0.7034) time: 0.1729 data: 0.0909 max mem: 9377 +Train: [35] [5200/6250] eta: 0:02:55 lr: 0.000095 grad: 0.1581 (0.1747) loss: 0.6968 (0.7035) time: 0.1446 data: 0.0526 max mem: 9377 +Train: [35] [5300/6250] eta: 0:02:39 lr: 0.000095 grad: 0.1528 (0.1743) loss: 0.7070 (0.7035) time: 0.2064 data: 0.1123 max mem: 9377 +Train: [35] [5400/6250] eta: 0:02:22 lr: 0.000095 grad: 0.1497 (0.1739) loss: 0.7066 (0.7034) time: 0.1748 data: 0.0828 max mem: 9377 +Train: [35] [5500/6250] eta: 0:02:05 lr: 0.000095 grad: 0.1511 (0.1735) loss: 0.7029 (0.7034) time: 0.1622 data: 0.0698 max mem: 9377 +Train: [35] [5600/6250] eta: 0:01:48 lr: 0.000095 grad: 0.1528 (0.1732) loss: 0.6974 (0.7035) time: 0.1493 data: 0.0601 max mem: 9377 +Train: [35] [5700/6250] eta: 0:01:31 lr: 0.000095 grad: 0.1540 (0.1729) loss: 0.7085 (0.7035) time: 0.1381 data: 0.0522 max mem: 9377 +Train: [35] [5800/6250] eta: 0:01:14 lr: 0.000095 grad: 0.1502 (0.1726) loss: 0.7096 (0.7036) time: 0.1630 data: 0.0662 max mem: 9377 +Train: [35] [5900/6250] eta: 0:00:58 lr: 0.000095 grad: 0.1536 (0.1722) loss: 0.7136 (0.7037) time: 0.1502 data: 0.0555 max mem: 9377 +Train: [35] [6000/6250] eta: 0:00:41 lr: 0.000095 grad: 0.1562 (0.1720) loss: 0.7096 (0.7038) time: 0.1678 data: 0.0769 max mem: 9377 +Train: [35] [6100/6250] eta: 0:00:24 lr: 0.000095 grad: 0.1566 (0.1717) loss: 0.7073 (0.7039) time: 0.1723 data: 0.0825 max mem: 9377 +Train: [35] [6200/6250] eta: 0:00:08 lr: 0.000095 grad: 0.1484 (0.1714) loss: 0.7041 (0.7039) time: 0.1469 data: 0.0557 max mem: 9377 +Train: [35] [6249/6250] eta: 0:00:00 lr: 0.000095 grad: 0.1553 (0.1713) loss: 0.6992 (0.7039) time: 0.1370 data: 0.0509 max mem: 9377 +Train: [35] Total time: 0:17:21 (0.1666 s / it) +Averaged stats: lr: 0.000095 grad: 0.1553 (0.1713) loss: 0.6992 (0.7039) +Eval (hcp-train-subset): [35] [ 0/62] eta: 0:06:16 loss: 0.8820 (0.8820) time: 6.0770 data: 6.0461 max mem: 9377 +Eval (hcp-train-subset): [35] [61/62] eta: 0:00:00 loss: 0.8874 (0.8888) time: 0.1277 data: 0.1025 max mem: 9377 +Eval (hcp-train-subset): [35] Total time: 0:00:15 (0.2431 s / it) +Averaged stats (hcp-train-subset): loss: 0.8874 (0.8888) +Eval (hcp-val): [35] [ 0/62] eta: 0:03:39 loss: 0.8940 (0.8940) time: 3.5327 data: 3.4606 max mem: 9377 +Eval (hcp-val): [35] [61/62] eta: 0:00:00 loss: 0.8873 (0.8876) time: 0.1271 data: 0.1000 max mem: 9377 +Eval (hcp-val): [35] Total time: 0:00:14 (0.2414 s / it) +Averaged stats (hcp-val): loss: 0.8873 (0.8876) +Eval (nsd-val): [35] [ 0/62] eta: 0:05:03 loss: 0.8558 (0.8558) time: 4.8923 data: 4.8594 max mem: 9377 +Eval (nsd-val): [35] [61/62] eta: 0:00:00 loss: 0.8648 (0.8657) time: 0.1538 data: 0.1268 max mem: 9377 +Eval (nsd-val): [35] Total time: 0:00:14 (0.2380 s / it) +Averaged stats (nsd-val): loss: 0.8648 (0.8657) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [36] [ 0/6250] eta: 8:36:23 lr: 0.000095 grad: 0.2308 (0.2308) loss: 0.7188 (0.7188) time: 4.9573 data: 4.6861 max mem: 9377 +Train: [36] [ 100/6250] eta: 0:22:57 lr: 0.000095 grad: 0.3253 (0.3380) loss: 0.7200 (0.7404) time: 0.1794 data: 0.0830 max mem: 9377 +Train: [36] [ 200/6250] eta: 0:19:59 lr: 0.000095 grad: 0.3432 (0.3615) loss: 0.6980 (0.7234) time: 0.1549 data: 0.0630 max mem: 9377 +Train: [36] [ 300/6250] eta: 0:18:39 lr: 0.000095 grad: 0.2249 (0.3255) loss: 0.7004 (0.7182) time: 0.1525 data: 0.0575 max mem: 9377 +Train: [36] [ 400/6250] eta: 0:18:12 lr: 0.000095 grad: 0.2682 (0.3076) loss: 0.7152 (0.7159) time: 0.1641 data: 0.0732 max mem: 9377 +Train: [36] [ 500/6250] eta: 0:17:40 lr: 0.000095 grad: 0.1998 (0.2905) loss: 0.7158 (0.7147) time: 0.1894 data: 0.0996 max mem: 9377 +Train: [36] [ 600/6250] eta: 0:17:04 lr: 0.000095 grad: 0.1906 (0.2767) loss: 0.7144 (0.7133) time: 0.1413 data: 0.0473 max mem: 9377 +Train: [36] [ 700/6250] eta: 0:16:32 lr: 0.000095 grad: 0.1564 (0.2606) loss: 0.7091 (0.7126) time: 0.1584 data: 0.0552 max mem: 9377 +Train: [36] [ 800/6250] eta: 0:16:08 lr: 0.000095 grad: 0.1528 (0.2478) loss: 0.6969 (0.7121) time: 0.1712 data: 0.0726 max mem: 9377 +Train: [36] [ 900/6250] eta: 0:15:43 lr: 0.000095 grad: 0.1607 (0.2374) loss: 0.7121 (0.7120) time: 0.1610 data: 0.0713 max mem: 9377 +Train: [36] [1000/6250] eta: 0:15:15 lr: 0.000095 grad: 0.1505 (0.2290) loss: 0.7140 (0.7118) time: 0.1502 data: 0.0606 max mem: 9377 +Train: [36] [1100/6250] eta: 0:14:47 lr: 0.000095 grad: 0.1586 (0.2222) loss: 0.6923 (0.7112) time: 0.1556 data: 0.0699 max mem: 9377 +Train: [36] [1200/6250] eta: 0:14:23 lr: 0.000095 grad: 0.1510 (0.2164) loss: 0.7061 (0.7110) time: 0.1345 data: 0.0524 max mem: 9377 +Train: [36] [1300/6250] eta: 0:14:03 lr: 0.000095 grad: 0.1556 (0.2118) loss: 0.6962 (0.7108) time: 0.1275 data: 0.0506 max mem: 9377 +Train: [36] [1400/6250] eta: 0:13:44 lr: 0.000095 grad: 0.1556 (0.2079) loss: 0.7033 (0.7103) time: 0.1765 data: 0.0873 max mem: 9377 +Train: [36] [1500/6250] eta: 0:13:26 lr: 0.000095 grad: 0.1529 (0.2041) loss: 0.7082 (0.7101) time: 0.1553 data: 0.0696 max mem: 9377 +Train: [36] [1600/6250] eta: 0:13:06 lr: 0.000094 grad: 0.1524 (0.2009) loss: 0.7045 (0.7100) time: 0.1666 data: 0.0870 max mem: 9377 +Train: [36] [1700/6250] eta: 0:12:47 lr: 0.000094 grad: 0.1536 (0.1982) loss: 0.7052 (0.7096) time: 0.1578 data: 0.0643 max mem: 9377 +Train: [36] [1800/6250] eta: 0:12:29 lr: 0.000094 grad: 0.1544 (0.1958) loss: 0.6931 (0.7092) time: 0.1640 data: 0.0657 max mem: 9377 +Train: [36] [1900/6250] eta: 0:12:13 lr: 0.000094 grad: 0.1587 (0.1939) loss: 0.7051 (0.7086) time: 0.1827 data: 0.0896 max mem: 9377 +Train: [36] [2000/6250] eta: 0:11:58 lr: 0.000094 grad: 0.1546 (0.1920) loss: 0.6778 (0.7078) time: 0.1670 data: 0.0822 max mem: 9377 +Train: [36] [2100/6250] eta: 0:11:40 lr: 0.000094 grad: 0.1608 (0.1903) loss: 0.6861 (0.7072) time: 0.1629 data: 0.0737 max mem: 9377 +Train: [36] [2200/6250] eta: 0:11:21 lr: 0.000094 grad: 0.1469 (0.1891) loss: 0.7126 (0.7066) time: 0.1677 data: 0.0878 max mem: 9377 +Train: [36] [2300/6250] eta: 0:11:01 lr: 0.000094 grad: 0.1615 (0.1879) loss: 0.7065 (0.7060) time: 0.1349 data: 0.0396 max mem: 9377 +Train: [36] [2400/6250] eta: 0:10:44 lr: 0.000094 grad: 0.1642 (0.1869) loss: 0.7111 (0.7054) time: 0.1621 data: 0.0777 max mem: 9377 +Train: [36] [2500/6250] eta: 0:10:26 lr: 0.000094 grad: 0.1561 (0.1859) loss: 0.6911 (0.7051) time: 0.1678 data: 0.0963 max mem: 9377 +Train: [36] [2600/6250] eta: 0:10:09 lr: 0.000094 grad: 0.1607 (0.1849) loss: 0.7035 (0.7047) time: 0.1845 data: 0.0941 max mem: 9377 +Train: [36] [2700/6250] eta: 0:09:53 lr: 0.000094 grad: 0.1620 (0.1840) loss: 0.6844 (0.7045) time: 0.1509 data: 0.0591 max mem: 9377 +Train: [36] [2800/6250] eta: 0:09:35 lr: 0.000094 grad: 0.1588 (0.1831) loss: 0.6965 (0.7044) time: 0.1706 data: 0.0866 max mem: 9377 +Train: [36] [2900/6250] eta: 0:09:17 lr: 0.000094 grad: 0.1609 (0.1823) loss: 0.6918 (0.7041) time: 0.1518 data: 0.0716 max mem: 9377 +Train: [36] [3000/6250] eta: 0:08:59 lr: 0.000094 grad: 0.1482 (0.1816) loss: 0.6978 (0.7041) time: 0.1479 data: 0.0610 max mem: 9377 +Train: [36] [3100/6250] eta: 0:08:42 lr: 0.000094 grad: 0.1501 (0.1808) loss: 0.6936 (0.7040) time: 0.1666 data: 0.0793 max mem: 9377 +Train: [36] [3200/6250] eta: 0:08:23 lr: 0.000094 grad: 0.1575 (0.1799) loss: 0.6934 (0.7039) time: 0.1564 data: 0.0583 max mem: 9377 +Train: [36] [3300/6250] eta: 0:08:05 lr: 0.000094 grad: 0.1513 (0.1791) loss: 0.7041 (0.7039) time: 0.1428 data: 0.0576 max mem: 9377 +Train: [36] [3400/6250] eta: 0:07:47 lr: 0.000094 grad: 0.1502 (0.1784) loss: 0.7162 (0.7041) time: 0.1592 data: 0.0701 max mem: 9377 +Train: [36] [3500/6250] eta: 0:07:30 lr: 0.000094 grad: 0.1497 (0.1777) loss: 0.7155 (0.7041) time: 0.1476 data: 0.0591 max mem: 9377 +Train: [36] [3600/6250] eta: 0:07:13 lr: 0.000094 grad: 0.1520 (0.1769) loss: 0.7081 (0.7042) time: 0.1569 data: 0.0769 max mem: 9377 +Train: [36] [3700/6250] eta: 0:06:56 lr: 0.000094 grad: 0.1555 (0.1763) loss: 0.6898 (0.7042) time: 0.1710 data: 0.0917 max mem: 9377 +Train: [36] [3800/6250] eta: 0:06:40 lr: 0.000094 grad: 0.1558 (0.1757) loss: 0.7084 (0.7044) time: 0.1442 data: 0.0639 max mem: 9377 +Train: [36] [3900/6250] eta: 0:06:23 lr: 0.000094 grad: 0.1513 (0.1753) loss: 0.7034 (0.7046) time: 0.1122 data: 0.0296 max mem: 9377 +Train: [36] [4000/6250] eta: 0:06:08 lr: 0.000094 grad: 0.1548 (0.1747) loss: 0.7069 (0.7047) time: 0.2393 data: 0.1614 max mem: 9377 +Train: [36] [4100/6250] eta: 0:05:50 lr: 0.000094 grad: 0.1563 (0.1743) loss: 0.7001 (0.7047) time: 0.1575 data: 0.0723 max mem: 9377 +Train: [36] [4200/6250] eta: 0:05:34 lr: 0.000094 grad: 0.1525 (0.1739) loss: 0.7057 (0.7047) time: 0.1302 data: 0.0452 max mem: 9377 +Train: [36] [4300/6250] eta: 0:05:17 lr: 0.000094 grad: 0.1543 (0.1735) loss: 0.7022 (0.7047) time: 0.1142 data: 0.0195 max mem: 9377 +Train: [36] [4400/6250] eta: 0:05:00 lr: 0.000094 grad: 0.1552 (0.1732) loss: 0.7070 (0.7047) time: 0.1422 data: 0.0594 max mem: 9377 +Train: [36] [4500/6250] eta: 0:04:44 lr: 0.000094 grad: 0.1690 (0.1729) loss: 0.6981 (0.7047) time: 0.1660 data: 0.0848 max mem: 9377 +Train: [36] [4600/6250] eta: 0:04:28 lr: 0.000094 grad: 0.1628 (0.1726) loss: 0.7073 (0.7047) time: 0.1458 data: 0.0617 max mem: 9377 +Train: [36] [4700/6250] eta: 0:04:11 lr: 0.000094 grad: 0.1595 (0.1723) loss: 0.6931 (0.7046) time: 0.1628 data: 0.0773 max mem: 9377 +Train: [36] [4800/6250] eta: 0:03:55 lr: 0.000094 grad: 0.1561 (0.1720) loss: 0.7074 (0.7045) time: 0.1671 data: 0.0788 max mem: 9377 +Train: [36] [4900/6250] eta: 0:03:38 lr: 0.000094 grad: 0.1590 (0.1717) loss: 0.6849 (0.7044) time: 0.1460 data: 0.0625 max mem: 9377 +Train: [36] [5000/6250] eta: 0:03:22 lr: 0.000094 grad: 0.1563 (0.1714) loss: 0.7019 (0.7044) time: 0.1712 data: 0.0760 max mem: 9377 +Train: [36] [5100/6250] eta: 0:03:06 lr: 0.000093 grad: 0.1543 (0.1712) loss: 0.6972 (0.7043) time: 0.1564 data: 0.0711 max mem: 9377 +Train: [36] [5200/6250] eta: 0:02:49 lr: 0.000093 grad: 0.1607 (0.1710) loss: 0.7024 (0.7041) time: 0.1514 data: 0.0618 max mem: 9377 +Train: [36] [5300/6250] eta: 0:02:33 lr: 0.000093 grad: 0.1551 (0.1708) loss: 0.6954 (0.7040) time: 0.1500 data: 0.0607 max mem: 9377 +Train: [36] [5400/6250] eta: 0:02:17 lr: 0.000093 grad: 0.1570 (0.1706) loss: 0.6990 (0.7040) time: 0.1735 data: 0.0869 max mem: 9377 +Train: [36] [5500/6250] eta: 0:02:00 lr: 0.000093 grad: 0.1613 (0.1704) loss: 0.6973 (0.7038) time: 0.1439 data: 0.0562 max mem: 9377 +Train: [36] [5600/6250] eta: 0:01:44 lr: 0.000093 grad: 0.1507 (0.1701) loss: 0.7055 (0.7037) time: 0.1622 data: 0.0776 max mem: 9377 +Train: [36] [5700/6250] eta: 0:01:28 lr: 0.000093 grad: 0.1592 (0.1700) loss: 0.6882 (0.7036) time: 0.1501 data: 0.0646 max mem: 9377 +Train: [36] [5800/6250] eta: 0:01:12 lr: 0.000093 grad: 0.1520 (0.1698) loss: 0.7016 (0.7035) time: 0.1706 data: 0.0829 max mem: 9377 +Train: [36] [5900/6250] eta: 0:00:56 lr: 0.000093 grad: 0.1595 (0.1696) loss: 0.6913 (0.7034) time: 0.1459 data: 0.0546 max mem: 9377 +Train: [36] [6000/6250] eta: 0:00:40 lr: 0.000093 grad: 0.1550 (0.1695) loss: 0.6995 (0.7033) time: 0.1577 data: 0.0684 max mem: 9377 +Train: [36] [6100/6250] eta: 0:00:24 lr: 0.000093 grad: 0.1591 (0.1694) loss: 0.6919 (0.7032) time: 0.1461 data: 0.0554 max mem: 9377 +Train: [36] [6200/6250] eta: 0:00:08 lr: 0.000093 grad: 0.1592 (0.1692) loss: 0.6903 (0.7031) time: 0.1517 data: 0.0670 max mem: 9377 +Train: [36] [6249/6250] eta: 0:00:00 lr: 0.000093 grad: 0.1565 (0.1691) loss: 0.7063 (0.7031) time: 0.1470 data: 0.0604 max mem: 9377 +Train: [36] Total time: 0:16:48 (0.1614 s / it) +Averaged stats: lr: 0.000093 grad: 0.1565 (0.1691) loss: 0.7063 (0.7031) +Eval (hcp-train-subset): [36] [ 0/62] eta: 0:03:26 loss: 0.8811 (0.8811) time: 3.3373 data: 3.2727 max mem: 9377 +Eval (hcp-train-subset): [36] [61/62] eta: 0:00:00 loss: 0.8835 (0.8868) time: 0.1466 data: 0.1215 max mem: 9377 +Eval (hcp-train-subset): [36] Total time: 0:00:14 (0.2391 s / it) +Averaged stats (hcp-train-subset): loss: 0.8835 (0.8868) +Eval (hcp-val): [36] [ 0/62] eta: 0:05:15 loss: 0.8854 (0.8854) time: 5.0913 data: 5.0606 max mem: 9377 +Eval (hcp-val): [36] [61/62] eta: 0:00:00 loss: 0.8819 (0.8874) time: 0.1491 data: 0.1221 max mem: 9377 +Eval (hcp-val): [36] Total time: 0:00:15 (0.2465 s / it) +Averaged stats (hcp-val): loss: 0.8819 (0.8874) +Eval (nsd-val): [36] [ 0/62] eta: 0:05:03 loss: 0.8551 (0.8551) time: 4.8975 data: 4.8663 max mem: 9377 +Eval (nsd-val): [36] [61/62] eta: 0:00:00 loss: 0.8634 (0.8645) time: 0.1398 data: 0.1147 max mem: 9377 +Eval (nsd-val): [36] Total time: 0:00:14 (0.2365 s / it) +Averaged stats (nsd-val): loss: 0.8634 (0.8645) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [37] [ 0/6250] eta: 11:17:59 lr: 0.000093 grad: 0.2088 (0.2088) loss: 0.8110 (0.8110) time: 6.5087 data: 6.4137 max mem: 9377 +Train: [37] [ 100/6250] eta: 0:23:03 lr: 0.000093 grad: 0.2835 (0.3594) loss: 0.7439 (0.7367) time: 0.1696 data: 0.0754 max mem: 9377 +Train: [37] [ 200/6250] eta: 0:19:52 lr: 0.000093 grad: 0.2816 (0.3378) loss: 0.7258 (0.7293) time: 0.1593 data: 0.0515 max mem: 9377 +Train: [37] [ 300/6250] eta: 0:18:33 lr: 0.000093 grad: 0.1975 (0.3028) loss: 0.7157 (0.7272) time: 0.1547 data: 0.0602 max mem: 9377 +Train: [37] [ 400/6250] eta: 0:17:51 lr: 0.000093 grad: 0.1689 (0.2722) loss: 0.7218 (0.7270) time: 0.1804 data: 0.0953 max mem: 9377 +Train: [37] [ 500/6250] eta: 0:17:11 lr: 0.000093 grad: 0.1521 (0.2510) loss: 0.7159 (0.7240) time: 0.1730 data: 0.0813 max mem: 9377 +Train: [37] [ 600/6250] eta: 0:16:35 lr: 0.000093 grad: 0.1607 (0.2369) loss: 0.6906 (0.7212) time: 0.1539 data: 0.0617 max mem: 9377 +Train: [37] [ 700/6250] eta: 0:16:02 lr: 0.000093 grad: 0.1537 (0.2273) loss: 0.7038 (0.7181) time: 0.1758 data: 0.0983 max mem: 9377 +Train: [37] [ 800/6250] eta: 0:15:37 lr: 0.000093 grad: 0.1605 (0.2190) loss: 0.7148 (0.7164) time: 0.1638 data: 0.0753 max mem: 9377 +Train: [37] [ 900/6250] eta: 0:15:12 lr: 0.000093 grad: 0.1654 (0.2125) loss: 0.6958 (0.7149) time: 0.1688 data: 0.0739 max mem: 9377 +Train: [37] [1000/6250] eta: 0:14:44 lr: 0.000093 grad: 0.1483 (0.2072) loss: 0.7038 (0.7138) time: 0.1587 data: 0.0636 max mem: 9377 +Train: [37] [1100/6250] eta: 0:14:15 lr: 0.000093 grad: 0.1527 (0.2027) loss: 0.7215 (0.7130) time: 0.1446 data: 0.0505 max mem: 9377 +Train: [37] [1200/6250] eta: 0:13:49 lr: 0.000093 grad: 0.1525 (0.1986) loss: 0.6960 (0.7127) time: 0.1320 data: 0.0411 max mem: 9377 +Train: [37] [1300/6250] eta: 0:13:28 lr: 0.000093 grad: 0.1550 (0.1955) loss: 0.7188 (0.7121) time: 0.1708 data: 0.0834 max mem: 9377 +Train: [37] [1400/6250] eta: 0:13:07 lr: 0.000093 grad: 0.1546 (0.1927) loss: 0.7133 (0.7114) time: 0.1388 data: 0.0395 max mem: 9377 +Train: [37] [1500/6250] eta: 0:12:49 lr: 0.000093 grad: 0.1567 (0.1904) loss: 0.6977 (0.7105) time: 0.1691 data: 0.0814 max mem: 9377 +Train: [37] [1600/6250] eta: 0:12:36 lr: 0.000093 grad: 0.1539 (0.1883) loss: 0.6899 (0.7097) time: 0.1521 data: 0.0701 max mem: 9377 +Train: [37] [1700/6250] eta: 0:12:21 lr: 0.000093 grad: 0.1527 (0.1866) loss: 0.7021 (0.7089) time: 0.1613 data: 0.0754 max mem: 9377 +Train: [37] [1800/6250] eta: 0:12:05 lr: 0.000093 grad: 0.1515 (0.1848) loss: 0.7101 (0.7086) time: 0.1545 data: 0.0663 max mem: 9377 +Train: [37] [1900/6250] eta: 0:11:48 lr: 0.000093 grad: 0.1567 (0.1834) loss: 0.7035 (0.7081) time: 0.1724 data: 0.0833 max mem: 9377 +Train: [37] [2000/6250] eta: 0:11:31 lr: 0.000093 grad: 0.1582 (0.1821) loss: 0.6935 (0.7077) time: 0.1539 data: 0.0623 max mem: 9377 +Train: [37] [2100/6250] eta: 0:11:15 lr: 0.000093 grad: 0.1587 (0.1810) loss: 0.6898 (0.7069) time: 0.1469 data: 0.0534 max mem: 9377 +Train: [37] [2200/6250] eta: 0:10:57 lr: 0.000093 grad: 0.1533 (0.1800) loss: 0.6913 (0.7063) time: 0.1445 data: 0.0545 max mem: 9377 +Train: [37] [2300/6250] eta: 0:10:39 lr: 0.000092 grad: 0.1566 (0.1791) loss: 0.6857 (0.7058) time: 0.1515 data: 0.0683 max mem: 9377 +Train: [37] [2400/6250] eta: 0:10:21 lr: 0.000092 grad: 0.1582 (0.1783) loss: 0.6964 (0.7055) time: 0.1600 data: 0.0637 max mem: 9377 +Train: [37] [2500/6250] eta: 0:10:02 lr: 0.000092 grad: 0.1538 (0.1773) loss: 0.7071 (0.7054) time: 0.1525 data: 0.0660 max mem: 9377 +Train: [37] [2600/6250] eta: 0:09:48 lr: 0.000092 grad: 0.1520 (0.1766) loss: 0.7046 (0.7051) time: 0.2168 data: 0.1414 max mem: 9377 +Train: [37] [2700/6250] eta: 0:09:35 lr: 0.000092 grad: 0.1543 (0.1760) loss: 0.6989 (0.7049) time: 0.1732 data: 0.0810 max mem: 9377 +Train: [37] [2800/6250] eta: 0:09:21 lr: 0.000092 grad: 0.1570 (0.1753) loss: 0.7047 (0.7048) time: 0.1920 data: 0.1061 max mem: 9377 +Train: [37] [2900/6250] eta: 0:09:06 lr: 0.000092 grad: 0.1515 (0.1747) loss: 0.6957 (0.7046) time: 0.1584 data: 0.0673 max mem: 9377 +Train: [37] [3000/6250] eta: 0:08:52 lr: 0.000092 grad: 0.1546 (0.1742) loss: 0.7086 (0.7043) time: 0.1699 data: 0.0744 max mem: 9377 +Train: [37] [3100/6250] eta: 0:08:35 lr: 0.000092 grad: 0.1591 (0.1737) loss: 0.6926 (0.7041) time: 0.1535 data: 0.0565 max mem: 9377 +Train: [37] [3200/6250] eta: 0:08:19 lr: 0.000092 grad: 0.1524 (0.1732) loss: 0.6953 (0.7039) time: 0.1549 data: 0.0699 max mem: 9377 +Train: [37] [3300/6250] eta: 0:08:02 lr: 0.000092 grad: 0.1584 (0.1726) loss: 0.6938 (0.7038) time: 0.1627 data: 0.0784 max mem: 9377 +Train: [37] [3400/6250] eta: 0:07:45 lr: 0.000092 grad: 0.1558 (0.1722) loss: 0.6956 (0.7035) time: 0.1676 data: 0.0815 max mem: 9377 +Train: [37] [3500/6250] eta: 0:07:29 lr: 0.000092 grad: 0.1570 (0.1718) loss: 0.7103 (0.7034) time: 0.1802 data: 0.0947 max mem: 9377 +Train: [37] [3600/6250] eta: 0:07:12 lr: 0.000092 grad: 0.1562 (0.1714) loss: 0.7028 (0.7033) time: 0.1577 data: 0.0708 max mem: 9377 +Train: [37] [3700/6250] eta: 0:06:55 lr: 0.000092 grad: 0.1569 (0.1710) loss: 0.6898 (0.7030) time: 0.1456 data: 0.0550 max mem: 9377 +Train: [37] [3800/6250] eta: 0:06:38 lr: 0.000092 grad: 0.1526 (0.1707) loss: 0.7011 (0.7028) time: 0.1646 data: 0.0779 max mem: 9377 +Train: [37] [3900/6250] eta: 0:06:22 lr: 0.000092 grad: 0.1540 (0.1704) loss: 0.6878 (0.7027) time: 0.1510 data: 0.0542 max mem: 9377 +Train: [37] [4000/6250] eta: 0:06:05 lr: 0.000092 grad: 0.1578 (0.1700) loss: 0.7083 (0.7026) time: 0.1580 data: 0.0653 max mem: 9377 +Train: [37] [4100/6250] eta: 0:05:50 lr: 0.000092 grad: 0.1594 (0.1698) loss: 0.7018 (0.7026) time: 0.1947 data: 0.1092 max mem: 9377 +Train: [37] [4200/6250] eta: 0:05:33 lr: 0.000092 grad: 0.1572 (0.1696) loss: 0.7055 (0.7024) time: 0.1710 data: 0.0835 max mem: 9377 +Train: [37] [4300/6250] eta: 0:05:17 lr: 0.000092 grad: 0.1564 (0.1693) loss: 0.6944 (0.7023) time: 0.1695 data: 0.0798 max mem: 9377 +Train: [37] [4400/6250] eta: 0:05:01 lr: 0.000092 grad: 0.1572 (0.1691) loss: 0.6965 (0.7022) time: 0.1456 data: 0.0558 max mem: 9377 +Train: [37] [4500/6250] eta: 0:04:44 lr: 0.000092 grad: 0.1602 (0.1689) loss: 0.6875 (0.7019) time: 0.1374 data: 0.0540 max mem: 9377 +Train: [37] [4600/6250] eta: 0:04:28 lr: 0.000092 grad: 0.1626 (0.1687) loss: 0.6967 (0.7016) time: 0.1598 data: 0.0706 max mem: 9377 +Train: [37] [4700/6250] eta: 0:04:11 lr: 0.000092 grad: 0.1627 (0.1686) loss: 0.6886 (0.7015) time: 0.1176 data: 0.0239 max mem: 9377 +Train: [37] [4800/6250] eta: 0:03:55 lr: 0.000092 grad: 0.1563 (0.1683) loss: 0.6861 (0.7014) time: 0.1519 data: 0.0618 max mem: 9377 +Train: [37] [4900/6250] eta: 0:03:38 lr: 0.000092 grad: 0.1568 (0.1681) loss: 0.6914 (0.7013) time: 0.1595 data: 0.0742 max mem: 9377 +Train: [37] [5000/6250] eta: 0:03:22 lr: 0.000092 grad: 0.1616 (0.1680) loss: 0.6957 (0.7013) time: 0.1583 data: 0.0685 max mem: 9377 +Train: [37] [5100/6250] eta: 0:03:06 lr: 0.000092 grad: 0.1531 (0.1678) loss: 0.7141 (0.7013) time: 0.1680 data: 0.0760 max mem: 9377 +Train: [37] [5200/6250] eta: 0:02:49 lr: 0.000092 grad: 0.1524 (0.1676) loss: 0.6937 (0.7013) time: 0.1306 data: 0.0374 max mem: 9377 +Train: [37] [5300/6250] eta: 0:02:33 lr: 0.000092 grad: 0.1618 (0.1674) loss: 0.6884 (0.7012) time: 0.1484 data: 0.0577 max mem: 9377 +Train: [37] [5400/6250] eta: 0:02:17 lr: 0.000092 grad: 0.1522 (0.1672) loss: 0.7032 (0.7011) time: 0.1525 data: 0.0660 max mem: 9377 +Train: [37] [5500/6250] eta: 0:02:01 lr: 0.000092 grad: 0.1626 (0.1671) loss: 0.6979 (0.7010) time: 0.1374 data: 0.0410 max mem: 9377 +Train: [37] [5600/6250] eta: 0:01:44 lr: 0.000092 grad: 0.1578 (0.1670) loss: 0.6931 (0.7010) time: 0.1767 data: 0.0899 max mem: 9377 +Train: [37] [5700/6250] eta: 0:01:28 lr: 0.000091 grad: 0.1602 (0.1668) loss: 0.6854 (0.7008) time: 0.1717 data: 0.0862 max mem: 9377 +Train: [37] [5800/6250] eta: 0:01:12 lr: 0.000091 grad: 0.1526 (0.1667) loss: 0.6841 (0.7007) time: 0.1513 data: 0.0653 max mem: 9377 +Train: [37] [5900/6250] eta: 0:00:56 lr: 0.000091 grad: 0.1582 (0.1666) loss: 0.6944 (0.7006) time: 0.1600 data: 0.0817 max mem: 9377 +Train: [37] [6000/6250] eta: 0:00:40 lr: 0.000091 grad: 0.1531 (0.1664) loss: 0.6901 (0.7005) time: 0.1583 data: 0.0713 max mem: 9377 +Train: [37] [6100/6250] eta: 0:00:24 lr: 0.000091 grad: 0.1674 (0.1663) loss: 0.6900 (0.7006) time: 0.1787 data: 0.0905 max mem: 9377 +Train: [37] [6200/6250] eta: 0:00:08 lr: 0.000091 grad: 0.1540 (0.1662) loss: 0.7015 (0.7006) time: 0.1463 data: 0.0568 max mem: 9377 +Train: [37] [6249/6250] eta: 0:00:00 lr: 0.000091 grad: 0.1560 (0.1661) loss: 0.6876 (0.7006) time: 0.1456 data: 0.0499 max mem: 9377 +Train: [37] Total time: 0:16:51 (0.1619 s / it) +Averaged stats: lr: 0.000091 grad: 0.1560 (0.1661) loss: 0.6876 (0.7006) +Eval (hcp-train-subset): [37] [ 0/62] eta: 0:05:23 loss: 0.8789 (0.8789) time: 5.2214 data: 5.1911 max mem: 9377 +Eval (hcp-train-subset): [37] [61/62] eta: 0:00:00 loss: 0.8851 (0.8866) time: 0.1556 data: 0.1255 max mem: 9377 +Eval (hcp-train-subset): [37] Total time: 0:00:14 (0.2349 s / it) +Averaged stats (hcp-train-subset): loss: 0.8851 (0.8866) +Eval (hcp-val): [37] [ 0/62] eta: 0:04:24 loss: 0.8893 (0.8893) time: 4.2706 data: 4.1980 max mem: 9377 +Eval (hcp-val): [37] [61/62] eta: 0:00:00 loss: 0.8832 (0.8853) time: 0.1284 data: 0.1033 max mem: 9377 +Eval (hcp-val): [37] Total time: 0:00:14 (0.2400 s / it) +Averaged stats (hcp-val): loss: 0.8832 (0.8853) +Eval (nsd-val): [37] [ 0/62] eta: 0:05:50 loss: 0.8566 (0.8566) time: 5.6456 data: 5.6114 max mem: 9377 +Eval (nsd-val): [37] [61/62] eta: 0:00:00 loss: 0.8655 (0.8679) time: 0.1207 data: 0.0956 max mem: 9377 +Eval (nsd-val): [37] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (nsd-val): loss: 0.8655 (0.8679) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [38] [ 0/6250] eta: 7:17:35 lr: 0.000091 grad: 0.0927 (0.0927) loss: 0.8829 (0.8829) time: 4.2009 data: 3.9042 max mem: 9377 +Train: [38] [ 100/6250] eta: 0:22:46 lr: 0.000091 grad: 0.2473 (0.3021) loss: 0.7102 (0.7256) time: 0.1809 data: 0.0731 max mem: 9377 +Train: [38] [ 200/6250] eta: 0:19:52 lr: 0.000091 grad: 0.3832 (0.3136) loss: 0.6867 (0.7132) time: 0.1765 data: 0.0804 max mem: 9377 +Train: [38] [ 300/6250] eta: 0:18:24 lr: 0.000091 grad: 0.2541 (0.3008) loss: 0.6842 (0.7066) time: 0.1763 data: 0.0875 max mem: 9377 +Train: [38] [ 400/6250] eta: 0:17:33 lr: 0.000091 grad: 0.2294 (0.2841) loss: 0.6982 (0.7022) time: 0.1578 data: 0.0676 max mem: 9377 +Train: [38] [ 500/6250] eta: 0:17:10 lr: 0.000091 grad: 0.2247 (0.2740) loss: 0.6774 (0.6990) time: 0.1883 data: 0.0892 max mem: 9377 +Train: [38] [ 600/6250] eta: 0:16:53 lr: 0.000091 grad: 0.1719 (0.2606) loss: 0.6798 (0.6975) time: 0.1886 data: 0.1037 max mem: 9377 +Train: [38] [ 700/6250] eta: 0:16:27 lr: 0.000091 grad: 0.1680 (0.2479) loss: 0.6899 (0.6957) time: 0.1752 data: 0.0800 max mem: 9377 +Train: [38] [ 800/6250] eta: 0:16:10 lr: 0.000091 grad: 0.1697 (0.2382) loss: 0.6910 (0.6946) time: 0.1863 data: 0.0998 max mem: 9377 +Train: [38] [ 900/6250] eta: 0:15:45 lr: 0.000091 grad: 0.1558 (0.2299) loss: 0.6978 (0.6948) time: 0.1468 data: 0.0363 max mem: 9377 +Train: [38] [1000/6250] eta: 0:15:18 lr: 0.000091 grad: 0.1534 (0.2229) loss: 0.7016 (0.6948) time: 0.1606 data: 0.0704 max mem: 9377 +Train: [38] [1100/6250] eta: 0:14:48 lr: 0.000091 grad: 0.1558 (0.2168) loss: 0.6866 (0.6947) time: 0.1655 data: 0.0774 max mem: 9377 +Train: [38] [1200/6250] eta: 0:14:24 lr: 0.000091 grad: 0.1492 (0.2121) loss: 0.6927 (0.6943) time: 0.1549 data: 0.0578 max mem: 9377 +Train: [38] [1300/6250] eta: 0:14:00 lr: 0.000091 grad: 0.1547 (0.2078) loss: 0.6780 (0.6942) time: 0.1406 data: 0.0560 max mem: 9377 +Train: [38] [1400/6250] eta: 0:13:39 lr: 0.000091 grad: 0.1557 (0.2042) loss: 0.6775 (0.6940) time: 0.1667 data: 0.0809 max mem: 9377 +Train: [38] [1500/6250] eta: 0:13:18 lr: 0.000091 grad: 0.1557 (0.2013) loss: 0.6888 (0.6937) time: 0.1796 data: 0.1053 max mem: 9377 +Train: [38] [1600/6250] eta: 0:13:02 lr: 0.000091 grad: 0.1608 (0.1986) loss: 0.7000 (0.6938) time: 0.1445 data: 0.0608 max mem: 9377 +Train: [38] [1700/6250] eta: 0:12:45 lr: 0.000091 grad: 0.1632 (0.1962) loss: 0.6894 (0.6936) time: 0.1589 data: 0.0749 max mem: 9377 +Train: [38] [1800/6250] eta: 0:12:28 lr: 0.000091 grad: 0.1522 (0.1939) loss: 0.6900 (0.6937) time: 0.1621 data: 0.0789 max mem: 9377 +Train: [38] [1900/6250] eta: 0:12:16 lr: 0.000091 grad: 0.1526 (0.1920) loss: 0.6969 (0.6936) time: 0.1680 data: 0.0759 max mem: 9377 +Train: [38] [2000/6250] eta: 0:11:58 lr: 0.000091 grad: 0.1558 (0.1903) loss: 0.6829 (0.6938) time: 0.1387 data: 0.0310 max mem: 9377 +Train: [38] [2100/6250] eta: 0:11:40 lr: 0.000091 grad: 0.1555 (0.1888) loss: 0.6885 (0.6939) time: 0.1537 data: 0.0581 max mem: 9377 +Train: [38] [2200/6250] eta: 0:11:20 lr: 0.000091 grad: 0.1499 (0.1872) loss: 0.6970 (0.6942) time: 0.1714 data: 0.0840 max mem: 9377 +Train: [38] [2300/6250] eta: 0:11:02 lr: 0.000091 grad: 0.1560 (0.1859) loss: 0.7030 (0.6945) time: 0.1782 data: 0.0972 max mem: 9377 +Train: [38] [2400/6250] eta: 0:10:44 lr: 0.000091 grad: 0.1509 (0.1846) loss: 0.6960 (0.6948) time: 0.1919 data: 0.1096 max mem: 9377 +Train: [38] [2500/6250] eta: 0:10:26 lr: 0.000091 grad: 0.1604 (0.1837) loss: 0.6982 (0.6949) time: 0.1712 data: 0.0823 max mem: 9377 +Train: [38] [2600/6250] eta: 0:10:11 lr: 0.000091 grad: 0.1565 (0.1827) loss: 0.7112 (0.6952) time: 0.2162 data: 0.1381 max mem: 9377 +Train: [38] [2700/6250] eta: 0:09:52 lr: 0.000091 grad: 0.1567 (0.1816) loss: 0.7018 (0.6954) time: 0.1538 data: 0.0703 max mem: 9377 +Train: [38] [2800/6250] eta: 0:09:34 lr: 0.000091 grad: 0.1524 (0.1808) loss: 0.6813 (0.6954) time: 0.1470 data: 0.0616 max mem: 9377 +Train: [38] [2900/6250] eta: 0:09:16 lr: 0.000090 grad: 0.1572 (0.1799) loss: 0.6891 (0.6956) time: 0.1584 data: 0.0802 max mem: 9377 +Train: [38] [3000/6250] eta: 0:09:00 lr: 0.000090 grad: 0.1523 (0.1791) loss: 0.6943 (0.6957) time: 0.1753 data: 0.0846 max mem: 9377 +Train: [38] [3100/6250] eta: 0:08:43 lr: 0.000090 grad: 0.1483 (0.1783) loss: 0.7120 (0.6959) time: 0.2066 data: 0.1209 max mem: 9377 +Train: [38] [3200/6250] eta: 0:08:24 lr: 0.000090 grad: 0.1515 (0.1776) loss: 0.6839 (0.6961) time: 0.1395 data: 0.0451 max mem: 9377 +Train: [38] [3300/6250] eta: 0:08:06 lr: 0.000090 grad: 0.1497 (0.1770) loss: 0.6887 (0.6963) time: 0.1447 data: 0.0501 max mem: 9377 +Train: [38] [3400/6250] eta: 0:07:49 lr: 0.000090 grad: 0.1565 (0.1764) loss: 0.6995 (0.6964) time: 0.1603 data: 0.0702 max mem: 9377 +Train: [38] [3500/6250] eta: 0:07:31 lr: 0.000090 grad: 0.1517 (0.1758) loss: 0.7099 (0.6965) time: 0.1597 data: 0.0679 max mem: 9377 +Train: [38] [3600/6250] eta: 0:07:14 lr: 0.000090 grad: 0.1562 (0.1754) loss: 0.6971 (0.6966) time: 0.1459 data: 0.0603 max mem: 9377 +Train: [38] [3700/6250] eta: 0:06:56 lr: 0.000090 grad: 0.1541 (0.1749) loss: 0.7015 (0.6966) time: 0.1485 data: 0.0574 max mem: 9377 +Train: [38] [3800/6250] eta: 0:06:39 lr: 0.000090 grad: 0.1504 (0.1744) loss: 0.7127 (0.6967) time: 0.1343 data: 0.0510 max mem: 9377 +Train: [38] [3900/6250] eta: 0:06:23 lr: 0.000090 grad: 0.1583 (0.1741) loss: 0.6986 (0.6968) time: 0.1440 data: 0.0511 max mem: 9377 +Train: [38] [4000/6250] eta: 0:06:06 lr: 0.000090 grad: 0.1501 (0.1736) loss: 0.6959 (0.6969) time: 0.1441 data: 0.0487 max mem: 9377 +Train: [38] [4100/6250] eta: 0:05:49 lr: 0.000090 grad: 0.1501 (0.1733) loss: 0.7048 (0.6967) time: 0.1522 data: 0.0693 max mem: 9377 +Train: [38] [4200/6250] eta: 0:05:32 lr: 0.000090 grad: 0.1571 (0.1728) loss: 0.7068 (0.6968) time: 0.1584 data: 0.0718 max mem: 9377 +Train: [38] [4300/6250] eta: 0:05:16 lr: 0.000090 grad: 0.1579 (0.1725) loss: 0.7011 (0.6968) time: 0.1510 data: 0.0665 max mem: 9377 +Train: [38] [4400/6250] eta: 0:04:59 lr: 0.000090 grad: 0.1556 (0.1722) loss: 0.6957 (0.6967) time: 0.1526 data: 0.0558 max mem: 9377 +Train: [38] [4500/6250] eta: 0:04:43 lr: 0.000090 grad: 0.1561 (0.1719) loss: 0.6876 (0.6966) time: 0.1268 data: 0.0401 max mem: 9377 +Train: [38] [4600/6250] eta: 0:04:26 lr: 0.000090 grad: 0.1594 (0.1716) loss: 0.6901 (0.6966) time: 0.1728 data: 0.0858 max mem: 9377 +Train: [38] [4700/6250] eta: 0:04:10 lr: 0.000090 grad: 0.1546 (0.1714) loss: 0.6910 (0.6965) time: 0.1532 data: 0.0691 max mem: 9377 +Train: [38] [4800/6250] eta: 0:03:53 lr: 0.000090 grad: 0.1548 (0.1712) loss: 0.6888 (0.6964) time: 0.1337 data: 0.0433 max mem: 9377 +Train: [38] [4900/6250] eta: 0:03:37 lr: 0.000090 grad: 0.1601 (0.1709) loss: 0.6838 (0.6964) time: 0.1361 data: 0.0444 max mem: 9377 +Train: [38] [5000/6250] eta: 0:03:21 lr: 0.000090 grad: 0.1616 (0.1707) loss: 0.6880 (0.6964) time: 0.1477 data: 0.0517 max mem: 9377 +Train: [38] [5100/6250] eta: 0:03:05 lr: 0.000090 grad: 0.1580 (0.1706) loss: 0.7021 (0.6963) time: 0.2018 data: 0.1156 max mem: 9377 +Train: [38] [5200/6250] eta: 0:02:49 lr: 0.000090 grad: 0.1582 (0.1703) loss: 0.6943 (0.6964) time: 0.1595 data: 0.0684 max mem: 9377 +Train: [38] [5300/6250] eta: 0:02:32 lr: 0.000090 grad: 0.1533 (0.1700) loss: 0.6903 (0.6964) time: 0.1515 data: 0.0664 max mem: 9377 +Train: [38] [5400/6250] eta: 0:02:16 lr: 0.000090 grad: 0.1561 (0.1698) loss: 0.6867 (0.6965) time: 0.1525 data: 0.0695 max mem: 9377 +Train: [38] [5500/6250] eta: 0:02:00 lr: 0.000090 grad: 0.1546 (0.1696) loss: 0.7068 (0.6965) time: 0.1158 data: 0.0267 max mem: 9377 +Train: [38] [5600/6250] eta: 0:01:44 lr: 0.000090 grad: 0.1629 (0.1696) loss: 0.6968 (0.6965) time: 0.1400 data: 0.0498 max mem: 9377 +Train: [38] [5700/6250] eta: 0:01:28 lr: 0.000090 grad: 0.1587 (0.1694) loss: 0.7025 (0.6965) time: 0.1736 data: 0.0836 max mem: 9377 +Train: [38] [5800/6250] eta: 0:01:12 lr: 0.000090 grad: 0.1570 (0.1693) loss: 0.6792 (0.6964) time: 0.1589 data: 0.0733 max mem: 9377 +Train: [38] [5900/6250] eta: 0:00:56 lr: 0.000090 grad: 0.1614 (0.1691) loss: 0.6889 (0.6965) time: 0.1613 data: 0.0821 max mem: 9377 +Train: [38] [6000/6250] eta: 0:00:40 lr: 0.000090 grad: 0.1576 (0.1689) loss: 0.6879 (0.6964) time: 0.1632 data: 0.0774 max mem: 9377 +Train: [38] [6100/6250] eta: 0:00:24 lr: 0.000090 grad: 0.1594 (0.1688) loss: 0.6938 (0.6964) time: 0.1472 data: 0.0527 max mem: 9377 +Train: [38] [6200/6250] eta: 0:00:08 lr: 0.000089 grad: 0.1748 (0.1688) loss: 0.6889 (0.6964) time: 0.1701 data: 0.0900 max mem: 9377 +Train: [38] [6249/6250] eta: 0:00:00 lr: 0.000089 grad: 0.1601 (0.1688) loss: 0.6923 (0.6964) time: 0.1408 data: 0.0528 max mem: 9377 +Train: [38] Total time: 0:16:47 (0.1611 s / it) +Averaged stats: lr: 0.000089 grad: 0.1601 (0.1688) loss: 0.6923 (0.6964) +Eval (hcp-train-subset): [38] [ 0/62] eta: 0:03:59 loss: 0.8749 (0.8749) time: 3.8585 data: 3.7741 max mem: 9377 +Eval (hcp-train-subset): [38] [61/62] eta: 0:00:00 loss: 0.8837 (0.8851) time: 0.1529 data: 0.1260 max mem: 9377 +Eval (hcp-train-subset): [38] Total time: 0:00:14 (0.2394 s / it) +Averaged stats (hcp-train-subset): loss: 0.8837 (0.8851) +Eval (hcp-val): [38] [ 0/62] eta: 0:05:40 loss: 0.8903 (0.8903) time: 5.4851 data: 5.4391 max mem: 9377 +Eval (hcp-val): [38] [61/62] eta: 0:00:00 loss: 0.8799 (0.8850) time: 0.1502 data: 0.1232 max mem: 9377 +Eval (hcp-val): [38] Total time: 0:00:15 (0.2535 s / it) +Averaged stats (hcp-val): loss: 0.8799 (0.8850) +Eval (nsd-val): [38] [ 0/62] eta: 0:06:34 loss: 0.8599 (0.8599) time: 6.3709 data: 6.3387 max mem: 9377 +Eval (nsd-val): [38] [61/62] eta: 0:00:00 loss: 0.8643 (0.8636) time: 0.1622 data: 0.1346 max mem: 9377 +Eval (nsd-val): [38] Total time: 0:00:15 (0.2520 s / it) +Averaged stats (nsd-val): loss: 0.8643 (0.8636) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [39] [ 0/6250] eta: 13:16:23 lr: 0.000089 grad: 0.1268 (0.1268) loss: 0.8792 (0.8792) time: 7.6454 data: 7.5236 max mem: 9377 +Train: [39] [ 100/6250] eta: 0:25:40 lr: 0.000089 grad: 0.3890 (0.4309) loss: 0.7053 (0.7101) time: 0.1545 data: 0.0253 max mem: 9377 +Train: [39] [ 200/6250] eta: 0:21:51 lr: 0.000089 grad: 0.2484 (0.3828) loss: 0.7155 (0.7028) time: 0.1724 data: 0.0720 max mem: 9377 +Train: [39] [ 300/6250] eta: 0:20:23 lr: 0.000089 grad: 0.2738 (0.3492) loss: 0.6937 (0.7014) time: 0.1992 data: 0.1013 max mem: 9377 +Train: [39] [ 400/6250] eta: 0:19:07 lr: 0.000089 grad: 0.2069 (0.3189) loss: 0.6998 (0.7017) time: 0.2126 data: 0.1136 max mem: 9377 +Train: [39] [ 500/6250] eta: 0:18:08 lr: 0.000089 grad: 0.1957 (0.2988) loss: 0.6999 (0.7012) time: 0.1502 data: 0.0646 max mem: 9377 +Train: [39] [ 600/6250] eta: 0:17:35 lr: 0.000089 grad: 0.1756 (0.2796) loss: 0.7106 (0.7020) time: 0.1459 data: 0.0565 max mem: 9377 +Train: [39] [ 700/6250] eta: 0:16:55 lr: 0.000089 grad: 0.1765 (0.2645) loss: 0.6972 (0.7021) time: 0.1543 data: 0.0677 max mem: 9377 +Train: [39] [ 800/6250] eta: 0:16:34 lr: 0.000089 grad: 0.1662 (0.2528) loss: 0.6801 (0.7014) time: 0.1682 data: 0.0809 max mem: 9377 +Train: [39] [ 900/6250] eta: 0:16:12 lr: 0.000089 grad: 0.1609 (0.2435) loss: 0.6987 (0.7007) time: 0.1844 data: 0.0962 max mem: 9377 +Train: [39] [1000/6250] eta: 0:15:38 lr: 0.000089 grad: 0.1690 (0.2368) loss: 0.7060 (0.7008) time: 0.1331 data: 0.0356 max mem: 9377 +Train: [39] [1100/6250] eta: 0:15:06 lr: 0.000089 grad: 0.1550 (0.2301) loss: 0.6931 (0.7003) time: 0.1405 data: 0.0412 max mem: 9377 +Train: [39] [1200/6250] eta: 0:14:37 lr: 0.000089 grad: 0.1537 (0.2241) loss: 0.6974 (0.7001) time: 0.1525 data: 0.0619 max mem: 9377 +Train: [39] [1300/6250] eta: 0:14:14 lr: 0.000089 grad: 0.1577 (0.2188) loss: 0.7010 (0.7003) time: 0.1467 data: 0.0584 max mem: 9377 +Train: [39] [1400/6250] eta: 0:13:52 lr: 0.000089 grad: 0.1516 (0.2143) loss: 0.7004 (0.7002) time: 0.1434 data: 0.0503 max mem: 9377 +Train: [39] [1500/6250] eta: 0:13:31 lr: 0.000089 grad: 0.1611 (0.2105) loss: 0.6855 (0.7003) time: 0.1600 data: 0.0646 max mem: 9377 +Train: [39] [1600/6250] eta: 0:13:13 lr: 0.000089 grad: 0.1484 (0.2072) loss: 0.7060 (0.7007) time: 0.1636 data: 0.0811 max mem: 9377 +Train: [39] [1700/6250] eta: 0:12:56 lr: 0.000089 grad: 0.1547 (0.2042) loss: 0.7061 (0.7007) time: 0.1500 data: 0.0543 max mem: 9377 +Train: [39] [1800/6250] eta: 0:12:37 lr: 0.000089 grad: 0.1525 (0.2015) loss: 0.6903 (0.7005) time: 0.1789 data: 0.0944 max mem: 9377 +Train: [39] [1900/6250] eta: 0:12:19 lr: 0.000089 grad: 0.1564 (0.1991) loss: 0.6865 (0.7002) time: 0.1460 data: 0.0571 max mem: 9377 +Train: [39] [2000/6250] eta: 0:11:59 lr: 0.000089 grad: 0.1521 (0.1969) loss: 0.6939 (0.6999) time: 0.1517 data: 0.0646 max mem: 9377 +Train: [39] [2100/6250] eta: 0:11:41 lr: 0.000089 grad: 0.1547 (0.1949) loss: 0.7044 (0.6998) time: 0.1539 data: 0.0671 max mem: 9377 +Train: [39] [2200/6250] eta: 0:11:23 lr: 0.000089 grad: 0.1603 (0.1932) loss: 0.6935 (0.6995) time: 0.1537 data: 0.0616 max mem: 9377 +Train: [39] [2300/6250] eta: 0:11:03 lr: 0.000089 grad: 0.1573 (0.1915) loss: 0.6901 (0.6996) time: 0.1485 data: 0.0554 max mem: 9377 +Train: [39] [2400/6250] eta: 0:10:44 lr: 0.000089 grad: 0.1532 (0.1902) loss: 0.6979 (0.6995) time: 0.1513 data: 0.0583 max mem: 9377 +Train: [39] [2500/6250] eta: 0:10:25 lr: 0.000089 grad: 0.1531 (0.1890) loss: 0.6918 (0.6992) time: 0.1759 data: 0.0869 max mem: 9377 +Train: [39] [2600/6250] eta: 0:10:06 lr: 0.000089 grad: 0.1561 (0.1877) loss: 0.7079 (0.6992) time: 0.1601 data: 0.0748 max mem: 9377 +Train: [39] [2700/6250] eta: 0:09:50 lr: 0.000089 grad: 0.1580 (0.1867) loss: 0.6911 (0.6989) time: 0.1506 data: 0.0585 max mem: 9377 +Train: [39] [2800/6250] eta: 0:09:33 lr: 0.000089 grad: 0.1587 (0.1858) loss: 0.6812 (0.6985) time: 0.1700 data: 0.0793 max mem: 9377 +Train: [39] [2900/6250] eta: 0:09:16 lr: 0.000089 grad: 0.1664 (0.1849) loss: 0.6892 (0.6982) time: 0.1471 data: 0.0624 max mem: 9377 +Train: [39] [3000/6250] eta: 0:08:59 lr: 0.000089 grad: 0.1606 (0.1843) loss: 0.6769 (0.6975) time: 0.1478 data: 0.0560 max mem: 9377 +Train: [39] [3100/6250] eta: 0:08:45 lr: 0.000089 grad: 0.1617 (0.1837) loss: 0.6777 (0.6971) time: 0.1677 data: 0.0742 max mem: 9377 +Train: [39] [3200/6250] eta: 0:08:29 lr: 0.000089 grad: 0.1534 (0.1829) loss: 0.6909 (0.6968) time: 0.1420 data: 0.0424 max mem: 9377 +Train: [39] [3300/6250] eta: 0:08:13 lr: 0.000088 grad: 0.1548 (0.1822) loss: 0.6897 (0.6964) time: 0.1611 data: 0.0660 max mem: 9377 +Train: [39] [3400/6250] eta: 0:07:56 lr: 0.000088 grad: 0.1551 (0.1815) loss: 0.6682 (0.6960) time: 0.1505 data: 0.0683 max mem: 9377 +Train: [39] [3500/6250] eta: 0:07:38 lr: 0.000088 grad: 0.1613 (0.1810) loss: 0.6833 (0.6956) time: 0.1552 data: 0.0703 max mem: 9377 +Train: [39] [3600/6250] eta: 0:07:21 lr: 0.000088 grad: 0.1601 (0.1803) loss: 0.6813 (0.6952) time: 0.1489 data: 0.0647 max mem: 9377 +Train: [39] [3700/6250] eta: 0:07:04 lr: 0.000088 grad: 0.1641 (0.1798) loss: 0.6855 (0.6949) time: 0.1436 data: 0.0500 max mem: 9377 +Train: [39] [3800/6250] eta: 0:06:47 lr: 0.000088 grad: 0.1624 (0.1794) loss: 0.6799 (0.6946) time: 0.1529 data: 0.0595 max mem: 9377 +Train: [39] [3900/6250] eta: 0:06:30 lr: 0.000088 grad: 0.1505 (0.1790) loss: 0.6880 (0.6944) time: 0.1846 data: 0.0853 max mem: 9377 +Train: [39] [4000/6250] eta: 0:06:13 lr: 0.000088 grad: 0.1571 (0.1786) loss: 0.6828 (0.6943) time: 0.1459 data: 0.0635 max mem: 9377 +Train: [39] [4100/6250] eta: 0:05:56 lr: 0.000088 grad: 0.1583 (0.1781) loss: 0.7049 (0.6943) time: 0.1624 data: 0.0772 max mem: 9377 +Train: [39] [4200/6250] eta: 0:05:39 lr: 0.000088 grad: 0.1585 (0.1776) loss: 0.6840 (0.6944) time: 0.1535 data: 0.0578 max mem: 9377 +Train: [39] [4300/6250] eta: 0:05:22 lr: 0.000088 grad: 0.1646 (0.1772) loss: 0.6709 (0.6941) time: 0.1255 data: 0.0438 max mem: 9377 +Train: [39] [4400/6250] eta: 0:05:05 lr: 0.000088 grad: 0.1554 (0.1770) loss: 0.6992 (0.6939) time: 0.1647 data: 0.0772 max mem: 9377 +Train: [39] [4500/6250] eta: 0:04:48 lr: 0.000088 grad: 0.1657 (0.1767) loss: 0.6750 (0.6936) time: 0.1241 data: 0.0277 max mem: 9377 +Train: [39] [4600/6250] eta: 0:04:31 lr: 0.000088 grad: 0.1549 (0.1763) loss: 0.6893 (0.6934) time: 0.1110 data: 0.0281 max mem: 9377 +Train: [39] [4700/6250] eta: 0:04:15 lr: 0.000088 grad: 0.1545 (0.1761) loss: 0.6810 (0.6932) time: 0.1529 data: 0.0571 max mem: 9377 +Train: [39] [4800/6250] eta: 0:03:58 lr: 0.000088 grad: 0.1606 (0.1759) loss: 0.6819 (0.6931) time: 0.1528 data: 0.0690 max mem: 9377 +Train: [39] [4900/6250] eta: 0:03:41 lr: 0.000088 grad: 0.1550 (0.1755) loss: 0.6937 (0.6929) time: 0.1545 data: 0.0679 max mem: 9377 +Train: [39] [5000/6250] eta: 0:03:25 lr: 0.000088 grad: 0.1589 (0.1753) loss: 0.7001 (0.6928) time: 0.1569 data: 0.0708 max mem: 9377 +Train: [39] [5100/6250] eta: 0:03:08 lr: 0.000088 grad: 0.1593 (0.1750) loss: 0.6902 (0.6927) time: 0.1576 data: 0.0683 max mem: 9377 +Train: [39] [5200/6250] eta: 0:02:52 lr: 0.000088 grad: 0.1614 (0.1748) loss: 0.6777 (0.6927) time: 0.1811 data: 0.0988 max mem: 9377 +Train: [39] [5300/6250] eta: 0:02:35 lr: 0.000088 grad: 0.1689 (0.1746) loss: 0.6666 (0.6924) time: 0.1607 data: 0.0735 max mem: 9377 +Train: [39] [5400/6250] eta: 0:02:18 lr: 0.000088 grad: 0.1570 (0.1745) loss: 0.6833 (0.6923) time: 0.1617 data: 0.0771 max mem: 9377 +Train: [39] [5500/6250] eta: 0:02:02 lr: 0.000088 grad: 0.1663 (0.1743) loss: 0.6781 (0.6921) time: 0.1532 data: 0.0636 max mem: 9377 +Train: [39] [5600/6250] eta: 0:01:46 lr: 0.000088 grad: 0.1591 (0.1741) loss: 0.6947 (0.6920) time: 0.1615 data: 0.0688 max mem: 9377 +Train: [39] [5700/6250] eta: 0:01:29 lr: 0.000088 grad: 0.1661 (0.1739) loss: 0.6992 (0.6920) time: 0.1663 data: 0.0684 max mem: 9377 +Train: [39] [5800/6250] eta: 0:01:13 lr: 0.000088 grad: 0.1598 (0.1737) loss: 0.6871 (0.6920) time: 0.1602 data: 0.0667 max mem: 9377 +Train: [39] [5900/6250] eta: 0:00:57 lr: 0.000088 grad: 0.1645 (0.1735) loss: 0.6941 (0.6919) time: 0.1503 data: 0.0609 max mem: 9377 +Train: [39] [6000/6250] eta: 0:00:40 lr: 0.000088 grad: 0.1612 (0.1734) loss: 0.6899 (0.6918) time: 0.1579 data: 0.0676 max mem: 9377 +Train: [39] [6100/6250] eta: 0:00:24 lr: 0.000088 grad: 0.1623 (0.1732) loss: 0.7001 (0.6918) time: 0.1512 data: 0.0593 max mem: 9377 +Train: [39] [6200/6250] eta: 0:00:08 lr: 0.000088 grad: 0.1615 (0.1731) loss: 0.6760 (0.6918) time: 0.1656 data: 0.0785 max mem: 9377 +Train: [39] [6249/6250] eta: 0:00:00 lr: 0.000088 grad: 0.1519 (0.1730) loss: 0.6989 (0.6918) time: 0.1424 data: 0.0501 max mem: 9377 +Train: [39] Total time: 0:17:03 (0.1637 s / it) +Averaged stats: lr: 0.000088 grad: 0.1519 (0.1730) loss: 0.6989 (0.6918) +Eval (hcp-train-subset): [39] [ 0/62] eta: 0:05:33 loss: 0.8770 (0.8770) time: 5.3744 data: 5.3373 max mem: 9377 +Eval (hcp-train-subset): [39] [61/62] eta: 0:00:00 loss: 0.8844 (0.8853) time: 0.1422 data: 0.1171 max mem: 9377 +Eval (hcp-train-subset): [39] Total time: 0:00:14 (0.2366 s / it) +Averaged stats (hcp-train-subset): loss: 0.8844 (0.8853) +Making plots (hcp-train-subset): example=3 +Eval (hcp-val): [39] [ 0/62] eta: 0:04:12 loss: 0.8921 (0.8921) time: 4.0702 data: 3.9659 max mem: 9377 +Eval (hcp-val): [39] [61/62] eta: 0:00:00 loss: 0.8815 (0.8844) time: 0.1449 data: 0.1199 max mem: 9377 +Eval (hcp-val): [39] Total time: 0:00:14 (0.2335 s / it) +Averaged stats (hcp-val): loss: 0.8815 (0.8844) +Making plots (hcp-val): example=24 +Eval (nsd-val): [39] [ 0/62] eta: 0:03:55 loss: 0.8551 (0.8551) time: 3.7904 data: 3.6865 max mem: 9377 +Eval (nsd-val): [39] [61/62] eta: 0:00:00 loss: 0.8632 (0.8635) time: 0.1407 data: 0.1152 max mem: 9377 +Eval (nsd-val): [39] Total time: 0:00:14 (0.2344 s / it) +Averaged stats (nsd-val): loss: 0.8632 (0.8635) +Making plots (nsd-val): example=33 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00039.pth +Train: [40] [ 0/6250] eta: 8:08:02 lr: 0.000088 grad: 0.1286 (0.1286) loss: 0.8552 (0.8552) time: 4.6852 data: 4.3378 max mem: 9377 +Train: [40] [ 100/6250] eta: 0:22:21 lr: 0.000088 grad: 0.3089 (0.3341) loss: 0.6896 (0.7247) time: 0.1453 data: 0.0301 max mem: 9377 +Train: [40] [ 200/6250] eta: 0:19:47 lr: 0.000088 grad: 0.3258 (0.3461) loss: 0.7036 (0.7117) time: 0.1638 data: 0.0513 max mem: 9377 +Train: [40] [ 300/6250] eta: 0:18:09 lr: 0.000088 grad: 0.2384 (0.3287) loss: 0.7043 (0.7084) time: 0.1573 data: 0.0523 max mem: 9377 +Train: [40] [ 400/6250] eta: 0:17:52 lr: 0.000087 grad: 0.2519 (0.3082) loss: 0.7037 (0.7080) time: 0.1918 data: 0.1002 max mem: 9377 +Train: [40] [ 500/6250] eta: 0:17:34 lr: 0.000087 grad: 0.2184 (0.2924) loss: 0.6892 (0.7074) time: 0.1305 data: 0.0345 max mem: 9377 +Train: [40] [ 600/6250] eta: 0:17:20 lr: 0.000087 grad: 0.2045 (0.2820) loss: 0.7154 (0.7074) time: 0.1953 data: 0.1031 max mem: 9377 +Train: [40] [ 700/6250] eta: 0:16:49 lr: 0.000087 grad: 0.1828 (0.2677) loss: 0.7078 (0.7079) time: 0.1513 data: 0.0539 max mem: 9377 +Train: [40] [ 800/6250] eta: 0:16:31 lr: 0.000087 grad: 0.1957 (0.2590) loss: 0.7018 (0.7077) time: 0.1756 data: 0.0845 max mem: 9377 +Train: [40] [ 900/6250] eta: 0:16:16 lr: 0.000087 grad: 0.1697 (0.2504) loss: 0.7048 (0.7076) time: 0.1877 data: 0.0904 max mem: 9377 +Train: [40] [1000/6250] eta: 0:15:55 lr: 0.000087 grad: 0.1629 (0.2418) loss: 0.6902 (0.7066) time: 0.1856 data: 0.0819 max mem: 9377 +Train: [40] [1100/6250] eta: 0:15:25 lr: 0.000087 grad: 0.1605 (0.2345) loss: 0.7033 (0.7060) time: 0.1399 data: 0.0450 max mem: 9377 +Train: [40] [1200/6250] eta: 0:14:56 lr: 0.000087 grad: 0.1571 (0.2284) loss: 0.6845 (0.7054) time: 0.1343 data: 0.0313 max mem: 9377 +Train: [40] [1300/6250] eta: 0:14:30 lr: 0.000087 grad: 0.1525 (0.2230) loss: 0.6908 (0.7050) time: 0.1586 data: 0.0662 max mem: 9377 +Train: [40] [1400/6250] eta: 0:14:07 lr: 0.000087 grad: 0.1540 (0.2180) loss: 0.7004 (0.7051) time: 0.1543 data: 0.0675 max mem: 9377 +Train: [40] [1500/6250] eta: 0:13:45 lr: 0.000087 grad: 0.1486 (0.2138) loss: 0.7040 (0.7051) time: 0.1514 data: 0.0673 max mem: 9377 +Train: [40] [1600/6250] eta: 0:13:31 lr: 0.000087 grad: 0.1597 (0.2101) loss: 0.7115 (0.7053) time: 0.1725 data: 0.0869 max mem: 9377 +Train: [40] [1700/6250] eta: 0:13:12 lr: 0.000087 grad: 0.1553 (0.2070) loss: 0.7136 (0.7056) time: 0.1528 data: 0.0625 max mem: 9377 +Train: [40] [1800/6250] eta: 0:12:52 lr: 0.000087 grad: 0.1490 (0.2040) loss: 0.6991 (0.7057) time: 0.1556 data: 0.0681 max mem: 9377 +Train: [40] [1900/6250] eta: 0:12:30 lr: 0.000087 grad: 0.1534 (0.2015) loss: 0.6922 (0.7055) time: 0.1501 data: 0.0735 max mem: 9377 +Train: [40] [2000/6250] eta: 0:12:11 lr: 0.000087 grad: 0.1590 (0.1994) loss: 0.7079 (0.7054) time: 0.1759 data: 0.0950 max mem: 9377 +Train: [40] [2100/6250] eta: 0:11:52 lr: 0.000087 grad: 0.1583 (0.1974) loss: 0.7071 (0.7052) time: 0.1592 data: 0.0712 max mem: 9377 +Train: [40] [2200/6250] eta: 0:11:32 lr: 0.000087 grad: 0.1547 (0.1955) loss: 0.7105 (0.7052) time: 0.1462 data: 0.0514 max mem: 9377 +Train: [40] [2300/6250] eta: 0:11:12 lr: 0.000087 grad: 0.1633 (0.1941) loss: 0.6960 (0.7047) time: 0.1588 data: 0.0569 max mem: 9377 +Train: [40] [2400/6250] eta: 0:10:53 lr: 0.000087 grad: 0.1615 (0.1926) loss: 0.6894 (0.7044) time: 0.1840 data: 0.0906 max mem: 9377 +Train: [40] [2500/6250] eta: 0:10:34 lr: 0.000087 grad: 0.1558 (0.1913) loss: 0.6915 (0.7040) time: 0.1361 data: 0.0471 max mem: 9377 +Train: [40] [2600/6250] eta: 0:10:16 lr: 0.000087 grad: 0.1580 (0.1900) loss: 0.6920 (0.7035) time: 0.1643 data: 0.0726 max mem: 9377 +Train: [40] [2700/6250] eta: 0:10:00 lr: 0.000087 grad: 0.1598 (0.1889) loss: 0.6908 (0.7031) time: 0.1701 data: 0.0847 max mem: 9377 +Train: [40] [2800/6250] eta: 0:09:43 lr: 0.000087 grad: 0.1536 (0.1880) loss: 0.6919 (0.7027) time: 0.1969 data: 0.1137 max mem: 9377 +Train: [40] [2900/6250] eta: 0:09:26 lr: 0.000087 grad: 0.1554 (0.1871) loss: 0.7012 (0.7024) time: 0.1643 data: 0.0811 max mem: 9377 +Train: [40] [3000/6250] eta: 0:09:07 lr: 0.000087 grad: 0.1581 (0.1862) loss: 0.6905 (0.7020) time: 0.1527 data: 0.0694 max mem: 9377 +Train: [40] [3100/6250] eta: 0:08:50 lr: 0.000087 grad: 0.1670 (0.1855) loss: 0.6885 (0.7018) time: 0.1721 data: 0.0834 max mem: 9377 +Train: [40] [3200/6250] eta: 0:08:33 lr: 0.000087 grad: 0.1532 (0.1847) loss: 0.6978 (0.7015) time: 0.1694 data: 0.0804 max mem: 9377 +Train: [40] [3300/6250] eta: 0:08:16 lr: 0.000087 grad: 0.1538 (0.1839) loss: 0.6974 (0.7012) time: 0.1515 data: 0.0606 max mem: 9377 +Train: [40] [3400/6250] eta: 0:07:59 lr: 0.000087 grad: 0.1664 (0.1833) loss: 0.6919 (0.7008) time: 0.1450 data: 0.0477 max mem: 9377 +Train: [40] [3500/6250] eta: 0:07:42 lr: 0.000087 grad: 0.1591 (0.1827) loss: 0.6795 (0.7005) time: 0.1516 data: 0.0668 max mem: 9377 +Train: [40] [3600/6250] eta: 0:07:24 lr: 0.000087 grad: 0.1596 (0.1822) loss: 0.6724 (0.7001) time: 0.1713 data: 0.0810 max mem: 9377 +Train: [40] [3700/6250] eta: 0:07:07 lr: 0.000086 grad: 0.1599 (0.1817) loss: 0.6742 (0.6997) time: 0.1509 data: 0.0617 max mem: 9377 +Train: [40] [3800/6250] eta: 0:06:49 lr: 0.000086 grad: 0.1565 (0.1811) loss: 0.6843 (0.6993) time: 0.1490 data: 0.0686 max mem: 9377 +Train: [40] [3900/6250] eta: 0:06:33 lr: 0.000086 grad: 0.1584 (0.1806) loss: 0.6870 (0.6989) time: 0.1328 data: 0.0473 max mem: 9377 +Train: [40] [4000/6250] eta: 0:06:15 lr: 0.000086 grad: 0.1592 (0.1801) loss: 0.6882 (0.6987) time: 0.1479 data: 0.0619 max mem: 9377 +Train: [40] [4100/6250] eta: 0:05:59 lr: 0.000086 grad: 0.1624 (0.1795) loss: 0.7019 (0.6985) time: 0.1653 data: 0.0858 max mem: 9377 +Train: [40] [4200/6250] eta: 0:05:42 lr: 0.000086 grad: 0.1631 (0.1791) loss: 0.6838 (0.6983) time: 0.1674 data: 0.0870 max mem: 9377 +Train: [40] [4300/6250] eta: 0:05:25 lr: 0.000086 grad: 0.1529 (0.1787) loss: 0.6889 (0.6981) time: 0.1496 data: 0.0694 max mem: 9377 +Train: [40] [4400/6250] eta: 0:05:07 lr: 0.000086 grad: 0.1567 (0.1784) loss: 0.6878 (0.6979) time: 0.1578 data: 0.0597 max mem: 9377 +Train: [40] [4500/6250] eta: 0:04:50 lr: 0.000086 grad: 0.1619 (0.1781) loss: 0.6780 (0.6976) time: 0.1469 data: 0.0597 max mem: 9377 +Train: [40] [4600/6250] eta: 0:04:33 lr: 0.000086 grad: 0.1615 (0.1778) loss: 0.6947 (0.6974) time: 0.1585 data: 0.0638 max mem: 9377 +Train: [40] [4700/6250] eta: 0:04:16 lr: 0.000086 grad: 0.1562 (0.1776) loss: 0.6873 (0.6973) time: 0.1832 data: 0.0988 max mem: 9377 +Train: [40] [4800/6250] eta: 0:03:59 lr: 0.000086 grad: 0.1586 (0.1773) loss: 0.6896 (0.6971) time: 0.1654 data: 0.0795 max mem: 9377 +Train: [40] [4900/6250] eta: 0:03:42 lr: 0.000086 grad: 0.1621 (0.1770) loss: 0.6945 (0.6970) time: 0.1557 data: 0.0703 max mem: 9377 +Train: [40] [5000/6250] eta: 0:03:26 lr: 0.000086 grad: 0.1635 (0.1767) loss: 0.6980 (0.6969) time: 0.1469 data: 0.0629 max mem: 9377 +Train: [40] [5100/6250] eta: 0:03:09 lr: 0.000086 grad: 0.1669 (0.1764) loss: 0.6867 (0.6967) time: 0.1660 data: 0.0788 max mem: 9377 +Train: [40] [5200/6250] eta: 0:02:53 lr: 0.000086 grad: 0.1581 (0.1762) loss: 0.6819 (0.6965) time: 0.1438 data: 0.0555 max mem: 9377 +Train: [40] [5300/6250] eta: 0:02:36 lr: 0.000086 grad: 0.1586 (0.1759) loss: 0.7005 (0.6964) time: 0.1421 data: 0.0580 max mem: 9377 +Train: [40] [5400/6250] eta: 0:02:19 lr: 0.000086 grad: 0.1660 (0.1758) loss: 0.6826 (0.6962) time: 0.1475 data: 0.0552 max mem: 9377 +Train: [40] [5500/6250] eta: 0:02:03 lr: 0.000086 grad: 0.1596 (0.1755) loss: 0.6798 (0.6960) time: 0.1524 data: 0.0666 max mem: 9377 +Train: [40] [5600/6250] eta: 0:01:46 lr: 0.000086 grad: 0.1619 (0.1753) loss: 0.7049 (0.6959) time: 0.2035 data: 0.1253 max mem: 9377 +Train: [40] [5700/6250] eta: 0:01:30 lr: 0.000086 grad: 0.1649 (0.1753) loss: 0.6763 (0.6957) time: 0.1422 data: 0.0640 max mem: 9377 +Train: [40] [5800/6250] eta: 0:01:13 lr: 0.000086 grad: 0.1569 (0.1751) loss: 0.6782 (0.6956) time: 0.1470 data: 0.0579 max mem: 9377 +Train: [40] [5900/6250] eta: 0:00:57 lr: 0.000086 grad: 0.1605 (0.1749) loss: 0.6983 (0.6955) time: 0.1505 data: 0.0643 max mem: 9377 +Train: [40] [6000/6250] eta: 0:00:41 lr: 0.000086 grad: 0.1526 (0.1746) loss: 0.7037 (0.6953) time: 0.1732 data: 0.0968 max mem: 9377 +Train: [40] [6100/6250] eta: 0:00:24 lr: 0.000086 grad: 0.1615 (0.1745) loss: 0.6871 (0.6952) time: 0.1517 data: 0.0650 max mem: 9377 +Train: [40] [6200/6250] eta: 0:00:08 lr: 0.000086 grad: 0.1647 (0.1743) loss: 0.6959 (0.6952) time: 0.1154 data: 0.0285 max mem: 9377 +Train: [40] [6249/6250] eta: 0:00:00 lr: 0.000086 grad: 0.1628 (0.1742) loss: 0.6877 (0.6951) time: 0.1435 data: 0.0619 max mem: 9377 +Train: [40] Total time: 0:17:07 (0.1645 s / it) +Averaged stats: lr: 0.000086 grad: 0.1628 (0.1742) loss: 0.6877 (0.6951) +Eval (hcp-train-subset): [40] [ 0/62] eta: 0:04:40 loss: 0.8731 (0.8731) time: 4.5186 data: 4.4368 max mem: 9377 +Eval (hcp-train-subset): [40] [61/62] eta: 0:00:00 loss: 0.8885 (0.8900) time: 0.1315 data: 0.1066 max mem: 9377 +Eval (hcp-train-subset): [40] Total time: 0:00:14 (0.2365 s / it) +Averaged stats (hcp-train-subset): loss: 0.8885 (0.8900) +Eval (hcp-val): [40] [ 0/62] eta: 0:05:50 loss: 0.9021 (0.9021) time: 5.6610 data: 5.6304 max mem: 9377 +Eval (hcp-val): [40] [61/62] eta: 0:00:00 loss: 0.8879 (0.8886) time: 0.1571 data: 0.1318 max mem: 9377 +Eval (hcp-val): [40] Total time: 0:00:15 (0.2458 s / it) +Averaged stats (hcp-val): loss: 0.8879 (0.8886) +Eval (nsd-val): [40] [ 0/62] eta: 0:05:33 loss: 0.8647 (0.8647) time: 5.3865 data: 5.3547 max mem: 9377 +Eval (nsd-val): [40] [61/62] eta: 0:00:00 loss: 0.8723 (0.8735) time: 0.1635 data: 0.1376 max mem: 9377 +Eval (nsd-val): [40] Total time: 0:00:14 (0.2389 s / it) +Averaged stats (nsd-val): loss: 0.8723 (0.8735) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [41] [ 0/6250] eta: 8:45:18 lr: 0.000086 grad: 0.1308 (0.1308) loss: 0.8257 (0.8257) time: 5.0430 data: 4.8519 max mem: 9377 +Train: [41] [ 100/6250] eta: 0:24:53 lr: 0.000086 grad: 0.2452 (0.2522) loss: 0.7727 (0.7752) time: 0.1759 data: 0.0885 max mem: 9377 +Train: [41] [ 200/6250] eta: 0:21:38 lr: 0.000086 grad: 0.3029 (0.2817) loss: 0.6890 (0.7449) time: 0.1826 data: 0.0855 max mem: 9377 +Train: [41] [ 300/6250] eta: 0:19:59 lr: 0.000086 grad: 0.2547 (0.2872) loss: 0.6680 (0.7231) time: 0.1713 data: 0.0735 max mem: 9377 +Train: [41] [ 400/6250] eta: 0:19:18 lr: 0.000086 grad: 0.2132 (0.2792) loss: 0.6857 (0.7159) time: 0.2032 data: 0.1064 max mem: 9377 +Train: [41] [ 500/6250] eta: 0:18:32 lr: 0.000086 grad: 0.1838 (0.2620) loss: 0.7141 (0.7126) time: 0.1888 data: 0.0890 max mem: 9377 +Train: [41] [ 600/6250] eta: 0:17:55 lr: 0.000086 grad: 0.1902 (0.2517) loss: 0.6972 (0.7102) time: 0.1865 data: 0.0857 max mem: 9377 +Train: [41] [ 700/6250] eta: 0:17:16 lr: 0.000085 grad: 0.2150 (0.2448) loss: 0.6819 (0.7070) time: 0.1693 data: 0.0820 max mem: 9377 +Train: [41] [ 800/6250] eta: 0:16:48 lr: 0.000085 grad: 0.1954 (0.2403) loss: 0.6888 (0.7055) time: 0.1819 data: 0.0807 max mem: 9377 +Train: [41] [ 900/6250] eta: 0:16:23 lr: 0.000085 grad: 0.1867 (0.2347) loss: 0.6903 (0.7037) time: 0.1943 data: 0.1082 max mem: 9377 +Train: [41] [1000/6250] eta: 0:15:46 lr: 0.000085 grad: 0.1767 (0.2297) loss: 0.7010 (0.7029) time: 0.1512 data: 0.0568 max mem: 9377 +Train: [41] [1100/6250] eta: 0:15:14 lr: 0.000085 grad: 0.1715 (0.2246) loss: 0.7027 (0.7025) time: 0.1559 data: 0.0639 max mem: 9377 +Train: [41] [1200/6250] eta: 0:14:45 lr: 0.000085 grad: 0.1802 (0.2204) loss: 0.6817 (0.7018) time: 0.1447 data: 0.0612 max mem: 9377 +Train: [41] [1300/6250] eta: 0:14:20 lr: 0.000085 grad: 0.1781 (0.2183) loss: 0.6898 (0.7013) time: 0.1524 data: 0.0616 max mem: 9377 +Train: [41] [1400/6250] eta: 0:13:59 lr: 0.000085 grad: 0.1633 (0.2153) loss: 0.7005 (0.7008) time: 0.1454 data: 0.0509 max mem: 9377 +Train: [41] [1500/6250] eta: 0:13:37 lr: 0.000085 grad: 0.1631 (0.2120) loss: 0.6963 (0.7002) time: 0.1689 data: 0.0872 max mem: 9377 +Train: [41] [1600/6250] eta: 0:13:22 lr: 0.000085 grad: 0.1610 (0.2091) loss: 0.6875 (0.6998) time: 0.2717 data: 0.1947 max mem: 9377 +Train: [41] [1700/6250] eta: 0:13:02 lr: 0.000085 grad: 0.1648 (0.2065) loss: 0.6833 (0.6993) time: 0.1498 data: 0.0691 max mem: 9377 +Train: [41] [1800/6250] eta: 0:12:48 lr: 0.000085 grad: 0.1634 (0.2042) loss: 0.6734 (0.6988) time: 0.1886 data: 0.1068 max mem: 9377 +Train: [41] [1900/6250] eta: 0:12:34 lr: 0.000085 grad: 0.1594 (0.2021) loss: 0.6854 (0.6983) time: 0.1997 data: 0.1167 max mem: 9377 +Train: [41] [2000/6250] eta: 0:12:18 lr: 0.000085 grad: 0.1605 (0.2001) loss: 0.6780 (0.6979) time: 0.1918 data: 0.0993 max mem: 9377 +Train: [41] [2100/6250] eta: 0:12:02 lr: 0.000085 grad: 0.1673 (0.1983) loss: 0.6786 (0.6975) time: 0.1982 data: 0.1073 max mem: 9377 +Train: [41] [2200/6250] eta: 0:11:46 lr: 0.000085 grad: 0.1656 (0.1967) loss: 0.6912 (0.6972) time: 0.1603 data: 0.0779 max mem: 9377 +Train: [41] [2300/6250] eta: 0:11:25 lr: 0.000085 grad: 0.1569 (0.1953) loss: 0.6854 (0.6968) time: 0.1535 data: 0.0615 max mem: 9377 +Train: [41] [2400/6250] eta: 0:11:04 lr: 0.000085 grad: 0.1567 (0.1939) loss: 0.6697 (0.6964) time: 0.1549 data: 0.0658 max mem: 9377 +Train: [41] [2500/6250] eta: 0:10:45 lr: 0.000085 grad: 0.1519 (0.1926) loss: 0.6953 (0.6964) time: 0.1549 data: 0.0641 max mem: 9377 +Train: [41] [2600/6250] eta: 0:10:26 lr: 0.000085 grad: 0.1561 (0.1913) loss: 0.7025 (0.6964) time: 0.1630 data: 0.0751 max mem: 9377 +Train: [41] [2700/6250] eta: 0:10:07 lr: 0.000085 grad: 0.1593 (0.1900) loss: 0.6876 (0.6964) time: 0.1576 data: 0.0694 max mem: 9377 +Train: [41] [2800/6250] eta: 0:09:50 lr: 0.000085 grad: 0.1586 (0.1889) loss: 0.6897 (0.6964) time: 0.1754 data: 0.0873 max mem: 9377 +Train: [41] [2900/6250] eta: 0:09:33 lr: 0.000085 grad: 0.1487 (0.1877) loss: 0.7143 (0.6966) time: 0.1545 data: 0.0529 max mem: 9377 +Train: [41] [3000/6250] eta: 0:09:15 lr: 0.000085 grad: 0.1580 (0.1866) loss: 0.6884 (0.6967) time: 0.1682 data: 0.0867 max mem: 9377 +Train: [41] [3100/6250] eta: 0:08:57 lr: 0.000085 grad: 0.1587 (0.1858) loss: 0.6857 (0.6967) time: 0.1726 data: 0.0882 max mem: 9377 +Train: [41] [3200/6250] eta: 0:08:39 lr: 0.000085 grad: 0.1541 (0.1850) loss: 0.7011 (0.6968) time: 0.1528 data: 0.0603 max mem: 9377 +Train: [41] [3300/6250] eta: 0:08:22 lr: 0.000085 grad: 0.1587 (0.1842) loss: 0.6832 (0.6967) time: 0.1527 data: 0.0578 max mem: 9377 +Train: [41] [3400/6250] eta: 0:08:04 lr: 0.000085 grad: 0.1593 (0.1837) loss: 0.6889 (0.6967) time: 0.1885 data: 0.0979 max mem: 9377 +Train: [41] [3500/6250] eta: 0:07:45 lr: 0.000085 grad: 0.1563 (0.1831) loss: 0.6985 (0.6965) time: 0.1385 data: 0.0425 max mem: 9377 +Train: [41] [3600/6250] eta: 0:07:27 lr: 0.000085 grad: 0.1573 (0.1825) loss: 0.6887 (0.6964) time: 0.1512 data: 0.0606 max mem: 9377 +Train: [41] [3700/6250] eta: 0:07:09 lr: 0.000085 grad: 0.1583 (0.1819) loss: 0.6810 (0.6962) time: 0.1187 data: 0.0248 max mem: 9377 +Train: [41] [3800/6250] eta: 0:06:51 lr: 0.000085 grad: 0.1628 (0.1815) loss: 0.6899 (0.6959) time: 0.1696 data: 0.0814 max mem: 9377 +Train: [41] [3900/6250] eta: 0:06:33 lr: 0.000084 grad: 0.1692 (0.1811) loss: 0.6804 (0.6956) time: 0.1648 data: 0.0789 max mem: 9377 +Train: [41] [4000/6250] eta: 0:06:16 lr: 0.000084 grad: 0.1650 (0.1807) loss: 0.6723 (0.6952) time: 0.1606 data: 0.0716 max mem: 9377 +Train: [41] [4100/6250] eta: 0:05:59 lr: 0.000084 grad: 0.1627 (0.1803) loss: 0.6760 (0.6949) time: 0.1435 data: 0.0611 max mem: 9377 +Train: [41] [4200/6250] eta: 0:05:41 lr: 0.000084 grad: 0.1621 (0.1800) loss: 0.6866 (0.6946) time: 0.1544 data: 0.0640 max mem: 9377 +Train: [41] [4300/6250] eta: 0:05:24 lr: 0.000084 grad: 0.1630 (0.1795) loss: 0.6771 (0.6945) time: 0.1592 data: 0.0687 max mem: 9377 +Train: [41] [4400/6250] eta: 0:05:07 lr: 0.000084 grad: 0.1572 (0.1791) loss: 0.7048 (0.6944) time: 0.1726 data: 0.0833 max mem: 9377 +Train: [41] [4500/6250] eta: 0:04:50 lr: 0.000084 grad: 0.1578 (0.1786) loss: 0.6765 (0.6942) time: 0.1582 data: 0.0792 max mem: 9377 +Train: [41] [4600/6250] eta: 0:04:34 lr: 0.000084 grad: 0.1603 (0.1783) loss: 0.6953 (0.6940) time: 0.2077 data: 0.1190 max mem: 9377 +Train: [41] [4700/6250] eta: 0:04:16 lr: 0.000084 grad: 0.1633 (0.1780) loss: 0.6909 (0.6937) time: 0.1559 data: 0.0660 max mem: 9377 +Train: [41] [4800/6250] eta: 0:03:59 lr: 0.000084 grad: 0.1561 (0.1776) loss: 0.6849 (0.6936) time: 0.1493 data: 0.0626 max mem: 9377 +Train: [41] [4900/6250] eta: 0:03:42 lr: 0.000084 grad: 0.1581 (0.1772) loss: 0.6981 (0.6935) time: 0.1553 data: 0.0648 max mem: 9377 +Train: [41] [5000/6250] eta: 0:03:26 lr: 0.000084 grad: 0.1547 (0.1768) loss: 0.6954 (0.6935) time: 0.1720 data: 0.0932 max mem: 9377 +Train: [41] [5100/6250] eta: 0:03:09 lr: 0.000084 grad: 0.1599 (0.1765) loss: 0.6810 (0.6935) time: 0.1714 data: 0.0844 max mem: 9377 +Train: [41] [5200/6250] eta: 0:02:52 lr: 0.000084 grad: 0.1555 (0.1762) loss: 0.6930 (0.6934) time: 0.1394 data: 0.0432 max mem: 9377 +Train: [41] [5300/6250] eta: 0:02:36 lr: 0.000084 grad: 0.1589 (0.1758) loss: 0.6969 (0.6934) time: 0.1910 data: 0.1134 max mem: 9377 +Train: [41] [5400/6250] eta: 0:02:19 lr: 0.000084 grad: 0.1592 (0.1755) loss: 0.6930 (0.6935) time: 0.1537 data: 0.0658 max mem: 9377 +Train: [41] [5500/6250] eta: 0:02:02 lr: 0.000084 grad: 0.1562 (0.1752) loss: 0.6944 (0.6935) time: 0.1660 data: 0.0750 max mem: 9377 +Train: [41] [5600/6250] eta: 0:01:46 lr: 0.000084 grad: 0.1519 (0.1749) loss: 0.6925 (0.6935) time: 0.1660 data: 0.0796 max mem: 9377 +Train: [41] [5700/6250] eta: 0:01:30 lr: 0.000084 grad: 0.1538 (0.1746) loss: 0.7018 (0.6936) time: 0.1626 data: 0.0732 max mem: 9377 +Train: [41] [5800/6250] eta: 0:01:13 lr: 0.000084 grad: 0.1600 (0.1744) loss: 0.6985 (0.6936) time: 0.1630 data: 0.0744 max mem: 9377 +Train: [41] [5900/6250] eta: 0:00:57 lr: 0.000084 grad: 0.1561 (0.1742) loss: 0.6910 (0.6937) time: 0.1590 data: 0.0667 max mem: 9377 +Train: [41] [6000/6250] eta: 0:00:40 lr: 0.000084 grad: 0.1512 (0.1740) loss: 0.7128 (0.6939) time: 0.1781 data: 0.0892 max mem: 9377 +Train: [41] [6100/6250] eta: 0:00:24 lr: 0.000084 grad: 0.1564 (0.1737) loss: 0.6866 (0.6939) time: 0.1596 data: 0.0713 max mem: 9377 +Train: [41] [6200/6250] eta: 0:00:08 lr: 0.000084 grad: 0.1556 (0.1735) loss: 0.6952 (0.6940) time: 0.1744 data: 0.0874 max mem: 9377 +Train: [41] [6249/6250] eta: 0:00:00 lr: 0.000084 grad: 0.1583 (0.1734) loss: 0.6870 (0.6940) time: 0.1606 data: 0.0791 max mem: 9377 +Train: [41] Total time: 0:17:08 (0.1645 s / it) +Averaged stats: lr: 0.000084 grad: 0.1583 (0.1734) loss: 0.6870 (0.6940) +Eval (hcp-train-subset): [41] [ 0/62] eta: 0:05:48 loss: 0.8781 (0.8781) time: 5.6185 data: 5.5871 max mem: 9377 +Eval (hcp-train-subset): [41] [61/62] eta: 0:00:00 loss: 0.8860 (0.8869) time: 0.1496 data: 0.1245 max mem: 9377 +Eval (hcp-train-subset): [41] Total time: 0:00:14 (0.2394 s / it) +Averaged stats (hcp-train-subset): loss: 0.8860 (0.8869) +Eval (hcp-val): [41] [ 0/62] eta: 0:04:47 loss: 0.8884 (0.8884) time: 4.6362 data: 4.5583 max mem: 9377 +Eval (hcp-val): [41] [61/62] eta: 0:00:00 loss: 0.8871 (0.8874) time: 0.1383 data: 0.1126 max mem: 9377 +Eval (hcp-val): [41] Total time: 0:00:15 (0.2479 s / it) +Averaged stats (hcp-val): loss: 0.8871 (0.8874) +Eval (nsd-val): [41] [ 0/62] eta: 0:06:07 loss: 0.8708 (0.8708) time: 5.9296 data: 5.8966 max mem: 9377 +Eval (nsd-val): [41] [61/62] eta: 0:00:00 loss: 0.8765 (0.8768) time: 0.1431 data: 0.1174 max mem: 9377 +Eval (nsd-val): [41] Total time: 0:00:14 (0.2414 s / it) +Averaged stats (nsd-val): loss: 0.8765 (0.8768) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [42] [ 0/6250] eta: 9:33:25 lr: 0.000084 grad: 0.1359 (0.1359) loss: 0.7940 (0.7940) time: 5.5048 data: 5.1369 max mem: 9377 +Train: [42] [ 100/6250] eta: 0:23:44 lr: 0.000084 grad: 0.2676 (0.2689) loss: 0.7045 (0.7357) time: 0.1845 data: 0.0748 max mem: 9377 +Train: [42] [ 200/6250] eta: 0:20:46 lr: 0.000084 grad: 0.2206 (0.2572) loss: 0.7037 (0.7196) time: 0.1671 data: 0.0640 max mem: 9377 +Train: [42] [ 300/6250] eta: 0:19:23 lr: 0.000084 grad: 0.2338 (0.2552) loss: 0.6851 (0.7115) time: 0.1871 data: 0.0852 max mem: 9377 +Train: [42] [ 400/6250] eta: 0:18:28 lr: 0.000084 grad: 0.2254 (0.2494) loss: 0.6883 (0.7075) time: 0.1681 data: 0.0587 max mem: 9377 +Train: [42] [ 500/6250] eta: 0:17:44 lr: 0.000084 grad: 0.1950 (0.2406) loss: 0.6659 (0.7030) time: 0.1498 data: 0.0603 max mem: 9377 +Train: [42] [ 600/6250] eta: 0:17:08 lr: 0.000084 grad: 0.2093 (0.2336) loss: 0.6886 (0.6994) time: 0.1636 data: 0.0684 max mem: 9377 +Train: [42] [ 700/6250] eta: 0:16:35 lr: 0.000084 grad: 0.1826 (0.2289) loss: 0.6735 (0.6965) time: 0.1578 data: 0.0726 max mem: 9377 +Train: [42] [ 800/6250] eta: 0:16:08 lr: 0.000084 grad: 0.1729 (0.2230) loss: 0.6776 (0.6946) time: 0.1611 data: 0.0667 max mem: 9377 +Train: [42] [ 900/6250] eta: 0:15:43 lr: 0.000083 grad: 0.1668 (0.2170) loss: 0.6790 (0.6941) time: 0.1491 data: 0.0589 max mem: 9377 +Train: [42] [1000/6250] eta: 0:15:15 lr: 0.000083 grad: 0.1620 (0.2120) loss: 0.6890 (0.6938) time: 0.1645 data: 0.0724 max mem: 9377 +Train: [42] [1100/6250] eta: 0:14:50 lr: 0.000083 grad: 0.1635 (0.2077) loss: 0.6999 (0.6936) time: 0.1579 data: 0.0691 max mem: 9377 +Train: [42] [1200/6250] eta: 0:14:20 lr: 0.000083 grad: 0.1582 (0.2036) loss: 0.6925 (0.6934) time: 0.1482 data: 0.0506 max mem: 9377 +Train: [42] [1300/6250] eta: 0:13:54 lr: 0.000083 grad: 0.1593 (0.2003) loss: 0.7008 (0.6929) time: 0.1523 data: 0.0684 max mem: 9377 +Train: [42] [1400/6250] eta: 0:13:30 lr: 0.000083 grad: 0.1595 (0.1972) loss: 0.6904 (0.6927) time: 0.1320 data: 0.0408 max mem: 9377 +Train: [42] [1500/6250] eta: 0:13:09 lr: 0.000083 grad: 0.1588 (0.1950) loss: 0.6768 (0.6922) time: 0.1583 data: 0.0684 max mem: 9377 +Train: [42] [1600/6250] eta: 0:12:51 lr: 0.000083 grad: 0.1565 (0.1929) loss: 0.6798 (0.6921) time: 0.1504 data: 0.0624 max mem: 9377 +Train: [42] [1700/6250] eta: 0:12:38 lr: 0.000083 grad: 0.1599 (0.1910) loss: 0.6855 (0.6922) time: 0.1628 data: 0.0815 max mem: 9377 +Train: [42] [1800/6250] eta: 0:12:25 lr: 0.000083 grad: 0.1539 (0.1893) loss: 0.6857 (0.6920) time: 0.1653 data: 0.0846 max mem: 9377 +Train: [42] [1900/6250] eta: 0:12:09 lr: 0.000083 grad: 0.1627 (0.1881) loss: 0.6725 (0.6916) time: 0.1516 data: 0.0583 max mem: 9377 +Train: [42] [2000/6250] eta: 0:11:56 lr: 0.000083 grad: 0.1674 (0.1869) loss: 0.6866 (0.6914) time: 0.2068 data: 0.1146 max mem: 9377 +Train: [42] [2100/6250] eta: 0:11:40 lr: 0.000083 grad: 0.1728 (0.1859) loss: 0.6717 (0.6911) time: 0.1782 data: 0.0890 max mem: 9377 +Train: [42] [2200/6250] eta: 0:11:23 lr: 0.000083 grad: 0.1640 (0.1850) loss: 0.6727 (0.6906) time: 0.1678 data: 0.0803 max mem: 9377 +Train: [42] [2300/6250] eta: 0:11:04 lr: 0.000083 grad: 0.1671 (0.1842) loss: 0.6825 (0.6901) time: 0.1466 data: 0.0466 max mem: 9377 +Train: [42] [2400/6250] eta: 0:10:46 lr: 0.000083 grad: 0.1527 (0.1834) loss: 0.6835 (0.6901) time: 0.1734 data: 0.0825 max mem: 9377 +Train: [42] [2500/6250] eta: 0:10:27 lr: 0.000083 grad: 0.1684 (0.1827) loss: 0.6756 (0.6898) time: 0.1673 data: 0.0775 max mem: 9377 +Train: [42] [2600/6250] eta: 0:10:07 lr: 0.000083 grad: 0.1596 (0.1821) loss: 0.6722 (0.6896) time: 0.1424 data: 0.0465 max mem: 9377 +Train: [42] [2700/6250] eta: 0:09:48 lr: 0.000083 grad: 0.1603 (0.1813) loss: 0.6759 (0.6896) time: 0.1233 data: 0.0344 max mem: 9377 +Train: [42] [2800/6250] eta: 0:09:31 lr: 0.000083 grad: 0.1616 (0.1808) loss: 0.6926 (0.6894) time: 0.1311 data: 0.0375 max mem: 9377 +Train: [42] [2900/6250] eta: 0:09:14 lr: 0.000083 grad: 0.1615 (0.1802) loss: 0.6665 (0.6893) time: 0.1622 data: 0.0660 max mem: 9377 +Train: [42] [3000/6250] eta: 0:08:57 lr: 0.000083 grad: 0.1652 (0.1797) loss: 0.6751 (0.6892) time: 0.1568 data: 0.0677 max mem: 9377 +Train: [42] [3100/6250] eta: 0:08:40 lr: 0.000083 grad: 0.1611 (0.1791) loss: 0.6891 (0.6892) time: 0.1578 data: 0.0770 max mem: 9377 +Train: [42] [3200/6250] eta: 0:08:25 lr: 0.000083 grad: 0.1604 (0.1785) loss: 0.6928 (0.6894) time: 0.1775 data: 0.0869 max mem: 9377 +Train: [42] [3300/6250] eta: 0:08:09 lr: 0.000083 grad: 0.1711 (0.1782) loss: 0.6738 (0.6894) time: 0.1633 data: 0.0609 max mem: 9377 +Train: [42] [3400/6250] eta: 0:07:53 lr: 0.000083 grad: 0.1635 (0.1778) loss: 0.6899 (0.6893) time: 0.1685 data: 0.0810 max mem: 9377 +Train: [42] [3500/6250] eta: 0:07:36 lr: 0.000083 grad: 0.1611 (0.1774) loss: 0.6894 (0.6893) time: 0.1614 data: 0.0824 max mem: 9377 +Train: [42] [3600/6250] eta: 0:07:19 lr: 0.000083 grad: 0.1585 (0.1770) loss: 0.6909 (0.6893) time: 0.1499 data: 0.0602 max mem: 9377 +Train: [42] [3700/6250] eta: 0:07:02 lr: 0.000083 grad: 0.1583 (0.1765) loss: 0.6886 (0.6893) time: 0.1609 data: 0.0665 max mem: 9377 +Train: [42] [3800/6250] eta: 0:06:46 lr: 0.000083 grad: 0.1597 (0.1762) loss: 0.6823 (0.6893) time: 0.1768 data: 0.0933 max mem: 9377 +Train: [42] [3900/6250] eta: 0:06:29 lr: 0.000083 grad: 0.1608 (0.1758) loss: 0.7001 (0.6892) time: 0.1644 data: 0.0781 max mem: 9377 +Train: [42] [4000/6250] eta: 0:06:12 lr: 0.000083 grad: 0.1621 (0.1754) loss: 0.7029 (0.6895) time: 0.1853 data: 0.1014 max mem: 9377 +Train: [42] [4100/6250] eta: 0:05:55 lr: 0.000082 grad: 0.1609 (0.1750) loss: 0.7117 (0.6898) time: 0.1637 data: 0.0756 max mem: 9377 +Train: [42] [4200/6250] eta: 0:05:39 lr: 0.000082 grad: 0.1590 (0.1748) loss: 0.7013 (0.6900) time: 0.1646 data: 0.0884 max mem: 9377 +Train: [42] [4300/6250] eta: 0:05:23 lr: 0.000082 grad: 0.1558 (0.1744) loss: 0.7065 (0.6903) time: 0.2013 data: 0.1176 max mem: 9377 +Train: [42] [4400/6250] eta: 0:05:06 lr: 0.000082 grad: 0.1567 (0.1742) loss: 0.6939 (0.6904) time: 0.1723 data: 0.0807 max mem: 9377 +Train: [42] [4500/6250] eta: 0:04:49 lr: 0.000082 grad: 0.1551 (0.1739) loss: 0.7033 (0.6905) time: 0.1246 data: 0.0374 max mem: 9377 +Train: [42] [4600/6250] eta: 0:04:32 lr: 0.000082 grad: 0.1564 (0.1736) loss: 0.6998 (0.6907) time: 0.1479 data: 0.0619 max mem: 9377 +Train: [42] [4700/6250] eta: 0:04:15 lr: 0.000082 grad: 0.1625 (0.1734) loss: 0.6935 (0.6908) time: 0.1506 data: 0.0638 max mem: 9377 +Train: [42] [4800/6250] eta: 0:03:58 lr: 0.000082 grad: 0.1688 (0.1732) loss: 0.6910 (0.6909) time: 0.1662 data: 0.0824 max mem: 9377 +Train: [42] [4900/6250] eta: 0:03:42 lr: 0.000082 grad: 0.1572 (0.1729) loss: 0.6845 (0.6909) time: 0.1959 data: 0.1091 max mem: 9377 +Train: [42] [5000/6250] eta: 0:03:25 lr: 0.000082 grad: 0.1543 (0.1727) loss: 0.6956 (0.6909) time: 0.1631 data: 0.0843 max mem: 9377 +Train: [42] [5100/6250] eta: 0:03:09 lr: 0.000082 grad: 0.1564 (0.1725) loss: 0.6879 (0.6909) time: 0.1619 data: 0.0737 max mem: 9377 +Train: [42] [5200/6250] eta: 0:02:52 lr: 0.000082 grad: 0.1581 (0.1722) loss: 0.6987 (0.6910) time: 0.1442 data: 0.0571 max mem: 9377 +Train: [42] [5300/6250] eta: 0:02:36 lr: 0.000082 grad: 0.1555 (0.1720) loss: 0.6765 (0.6910) time: 0.1396 data: 0.0484 max mem: 9377 +Train: [42] [5400/6250] eta: 0:02:19 lr: 0.000082 grad: 0.1593 (0.1718) loss: 0.6846 (0.6909) time: 0.1585 data: 0.0664 max mem: 9377 +Train: [42] [5500/6250] eta: 0:02:03 lr: 0.000082 grad: 0.1597 (0.1717) loss: 0.6892 (0.6909) time: 0.1510 data: 0.0632 max mem: 9377 +Train: [42] [5600/6250] eta: 0:01:46 lr: 0.000082 grad: 0.1650 (0.1715) loss: 0.6813 (0.6908) time: 0.1448 data: 0.0466 max mem: 9377 +Train: [42] [5700/6250] eta: 0:01:30 lr: 0.000082 grad: 0.1588 (0.1713) loss: 0.6902 (0.6908) time: 0.1613 data: 0.0790 max mem: 9377 +Train: [42] [5800/6250] eta: 0:01:13 lr: 0.000082 grad: 0.1617 (0.1712) loss: 0.6719 (0.6907) time: 0.1465 data: 0.0587 max mem: 9377 +Train: [42] [5900/6250] eta: 0:00:57 lr: 0.000082 grad: 0.1541 (0.1710) loss: 0.7023 (0.6907) time: 0.1566 data: 0.0638 max mem: 9377 +Train: [42] [6000/6250] eta: 0:00:40 lr: 0.000082 grad: 0.1590 (0.1708) loss: 0.6948 (0.6907) time: 0.1431 data: 0.0540 max mem: 9377 +Train: [42] [6100/6250] eta: 0:00:24 lr: 0.000082 grad: 0.1603 (0.1707) loss: 0.6916 (0.6907) time: 0.1582 data: 0.0716 max mem: 9377 +Train: [42] [6200/6250] eta: 0:00:08 lr: 0.000082 grad: 0.1591 (0.1705) loss: 0.6917 (0.6907) time: 0.1628 data: 0.0702 max mem: 9377 +Train: [42] [6249/6250] eta: 0:00:00 lr: 0.000082 grad: 0.1619 (0.1705) loss: 0.6866 (0.6906) time: 0.1598 data: 0.0786 max mem: 9377 +Train: [42] Total time: 0:17:07 (0.1643 s / it) +Averaged stats: lr: 0.000082 grad: 0.1619 (0.1705) loss: 0.6866 (0.6906) +Eval (hcp-train-subset): [42] [ 0/62] eta: 0:03:55 loss: 0.8834 (0.8834) time: 3.7993 data: 3.7207 max mem: 9377 +Eval (hcp-train-subset): [42] [61/62] eta: 0:00:00 loss: 0.8886 (0.8898) time: 0.1472 data: 0.1221 max mem: 9377 +Eval (hcp-train-subset): [42] Total time: 0:00:14 (0.2409 s / it) +Averaged stats (hcp-train-subset): loss: 0.8886 (0.8898) +Eval (hcp-val): [42] [ 0/62] eta: 0:04:53 loss: 0.8901 (0.8901) time: 4.7336 data: 4.6431 max mem: 9377 +Eval (hcp-val): [42] [61/62] eta: 0:00:00 loss: 0.8893 (0.8897) time: 0.1452 data: 0.1159 max mem: 9377 +Eval (hcp-val): [42] Total time: 0:00:15 (0.2505 s / it) +Averaged stats (hcp-val): loss: 0.8893 (0.8897) +Eval (nsd-val): [42] [ 0/62] eta: 0:03:38 loss: 0.8729 (0.8729) time: 3.5186 data: 3.4440 max mem: 9377 +Eval (nsd-val): [42] [61/62] eta: 0:00:00 loss: 0.8748 (0.8783) time: 0.1639 data: 0.1378 max mem: 9377 +Eval (nsd-val): [42] Total time: 0:00:14 (0.2406 s / it) +Averaged stats (nsd-val): loss: 0.8748 (0.8783) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [43] [ 0/6250] eta: 12:19:28 lr: 0.000082 grad: 0.4517 (0.4517) loss: 0.6116 (0.6116) time: 7.0990 data: 6.9408 max mem: 9377 +Train: [43] [ 100/6250] eta: 0:22:29 lr: 0.000082 grad: 0.3423 (0.3382) loss: 0.6951 (0.7181) time: 0.1707 data: 0.0745 max mem: 9377 +Train: [43] [ 200/6250] eta: 0:19:27 lr: 0.000082 grad: 0.2679 (0.3121) loss: 0.7185 (0.7140) time: 0.1616 data: 0.0758 max mem: 9377 +Train: [43] [ 300/6250] eta: 0:18:01 lr: 0.000082 grad: 0.2767 (0.2943) loss: 0.6931 (0.7090) time: 0.1629 data: 0.0639 max mem: 9377 +Train: [43] [ 400/6250] eta: 0:17:10 lr: 0.000082 grad: 0.2428 (0.2852) loss: 0.6753 (0.7040) time: 0.1619 data: 0.0736 max mem: 9377 +Train: [43] [ 500/6250] eta: 0:16:49 lr: 0.000082 grad: 0.2065 (0.2714) loss: 0.6690 (0.7003) time: 0.1973 data: 0.1112 max mem: 9377 +Train: [43] [ 600/6250] eta: 0:16:20 lr: 0.000082 grad: 0.1756 (0.2570) loss: 0.6854 (0.6977) time: 0.1612 data: 0.0654 max mem: 9377 +Train: [43] [ 700/6250] eta: 0:15:56 lr: 0.000082 grad: 0.1748 (0.2466) loss: 0.6913 (0.6956) time: 0.1746 data: 0.0774 max mem: 9377 +Train: [43] [ 800/6250] eta: 0:15:26 lr: 0.000082 grad: 0.1699 (0.2376) loss: 0.6898 (0.6950) time: 0.1779 data: 0.0870 max mem: 9377 +Train: [43] [ 900/6250] eta: 0:15:07 lr: 0.000082 grad: 0.1602 (0.2295) loss: 0.7009 (0.6954) time: 0.1423 data: 0.0549 max mem: 9377 +Train: [43] [1000/6250] eta: 0:14:48 lr: 0.000081 grad: 0.1681 (0.2234) loss: 0.6962 (0.6956) time: 0.1636 data: 0.0737 max mem: 9377 +Train: [43] [1100/6250] eta: 0:14:30 lr: 0.000081 grad: 0.1672 (0.2184) loss: 0.7012 (0.6959) time: 0.1611 data: 0.0566 max mem: 9377 +Train: [43] [1200/6250] eta: 0:14:10 lr: 0.000081 grad: 0.1615 (0.2141) loss: 0.6920 (0.6959) time: 0.1599 data: 0.0555 max mem: 9377 +Train: [43] [1300/6250] eta: 0:13:50 lr: 0.000081 grad: 0.1639 (0.2101) loss: 0.6949 (0.6961) time: 0.1646 data: 0.0684 max mem: 9377 +Train: [43] [1400/6250] eta: 0:13:28 lr: 0.000081 grad: 0.1610 (0.2067) loss: 0.6814 (0.6956) time: 0.1463 data: 0.0574 max mem: 9377 +Train: [43] [1500/6250] eta: 0:13:10 lr: 0.000081 grad: 0.1611 (0.2041) loss: 0.6800 (0.6949) time: 0.1710 data: 0.0853 max mem: 9377 +Train: [43] [1600/6250] eta: 0:12:49 lr: 0.000081 grad: 0.1771 (0.2021) loss: 0.6771 (0.6941) time: 0.1529 data: 0.0618 max mem: 9377 +Train: [43] [1700/6250] eta: 0:12:34 lr: 0.000081 grad: 0.1606 (0.2000) loss: 0.6925 (0.6937) time: 0.1562 data: 0.0713 max mem: 9377 +Train: [43] [1800/6250] eta: 0:12:17 lr: 0.000081 grad: 0.1630 (0.1978) loss: 0.6942 (0.6937) time: 0.1645 data: 0.0735 max mem: 9377 +Train: [43] [1900/6250] eta: 0:12:01 lr: 0.000081 grad: 0.1570 (0.1962) loss: 0.6935 (0.6935) time: 0.1670 data: 0.0833 max mem: 9377 +Train: [43] [2000/6250] eta: 0:11:43 lr: 0.000081 grad: 0.1590 (0.1945) loss: 0.6852 (0.6930) time: 0.1753 data: 0.0822 max mem: 9377 +Train: [43] [2100/6250] eta: 0:11:26 lr: 0.000081 grad: 0.1658 (0.1931) loss: 0.6890 (0.6929) time: 0.1772 data: 0.0967 max mem: 9377 +Train: [43] [2200/6250] eta: 0:11:09 lr: 0.000081 grad: 0.1673 (0.1919) loss: 0.6912 (0.6927) time: 0.1751 data: 0.0894 max mem: 9377 +Train: [43] [2300/6250] eta: 0:10:51 lr: 0.000081 grad: 0.1570 (0.1906) loss: 0.6928 (0.6927) time: 0.1698 data: 0.0776 max mem: 9377 +Train: [43] [2400/6250] eta: 0:10:32 lr: 0.000081 grad: 0.1683 (0.1896) loss: 0.6770 (0.6926) time: 0.1468 data: 0.0524 max mem: 9377 +Train: [43] [2500/6250] eta: 0:10:13 lr: 0.000081 grad: 0.1648 (0.1887) loss: 0.6841 (0.6923) time: 0.1610 data: 0.0669 max mem: 9377 +Train: [43] [2600/6250] eta: 0:09:56 lr: 0.000081 grad: 0.1623 (0.1879) loss: 0.6917 (0.6922) time: 0.1611 data: 0.0805 max mem: 9377 +Train: [43] [2700/6250] eta: 0:09:39 lr: 0.000081 grad: 0.1671 (0.1872) loss: 0.6869 (0.6920) time: 0.1812 data: 0.0922 max mem: 9377 +Train: [43] [2800/6250] eta: 0:09:21 lr: 0.000081 grad: 0.1592 (0.1864) loss: 0.6949 (0.6919) time: 0.1449 data: 0.0574 max mem: 9377 +Train: [43] [2900/6250] eta: 0:09:06 lr: 0.000081 grad: 0.1604 (0.1856) loss: 0.6951 (0.6918) time: 0.2087 data: 0.1297 max mem: 9377 +Train: [43] [3000/6250] eta: 0:08:50 lr: 0.000081 grad: 0.1567 (0.1849) loss: 0.7114 (0.6920) time: 0.1666 data: 0.0853 max mem: 9377 +Train: [43] [3100/6250] eta: 0:08:33 lr: 0.000081 grad: 0.1582 (0.1842) loss: 0.6935 (0.6921) time: 0.1506 data: 0.0675 max mem: 9377 +Train: [43] [3200/6250] eta: 0:08:17 lr: 0.000081 grad: 0.1585 (0.1834) loss: 0.7028 (0.6922) time: 0.1826 data: 0.1111 max mem: 9377 +Train: [43] [3300/6250] eta: 0:08:01 lr: 0.000081 grad: 0.1534 (0.1827) loss: 0.7000 (0.6926) time: 0.1703 data: 0.0872 max mem: 9377 +Train: [43] [3400/6250] eta: 0:07:44 lr: 0.000081 grad: 0.1546 (0.1819) loss: 0.6944 (0.6927) time: 0.1639 data: 0.0698 max mem: 9377 +Train: [43] [3500/6250] eta: 0:07:28 lr: 0.000081 grad: 0.1534 (0.1813) loss: 0.6975 (0.6930) time: 0.1708 data: 0.0852 max mem: 9377 +Train: [43] [3600/6250] eta: 0:07:11 lr: 0.000081 grad: 0.1585 (0.1807) loss: 0.6959 (0.6931) time: 0.1553 data: 0.0684 max mem: 9377 +Train: [43] [3700/6250] eta: 0:06:54 lr: 0.000081 grad: 0.1593 (0.1802) loss: 0.6945 (0.6932) time: 0.1350 data: 0.0402 max mem: 9377 +Train: [43] [3800/6250] eta: 0:06:36 lr: 0.000081 grad: 0.1629 (0.1797) loss: 0.7023 (0.6934) time: 0.1398 data: 0.0386 max mem: 9377 +Train: [43] [3900/6250] eta: 0:06:19 lr: 0.000081 grad: 0.1536 (0.1792) loss: 0.7131 (0.6936) time: 0.1335 data: 0.0372 max mem: 9377 +Train: [43] [4000/6250] eta: 0:06:02 lr: 0.000081 grad: 0.1532 (0.1787) loss: 0.7181 (0.6938) time: 0.1469 data: 0.0508 max mem: 9377 +Train: [43] [4100/6250] eta: 0:05:46 lr: 0.000081 grad: 0.1533 (0.1784) loss: 0.7069 (0.6940) time: 0.1302 data: 0.0449 max mem: 9377 +Train: [43] [4200/6250] eta: 0:05:30 lr: 0.000080 grad: 0.1548 (0.1779) loss: 0.7140 (0.6943) time: 0.1417 data: 0.0443 max mem: 9377 +Train: [43] [4300/6250] eta: 0:05:14 lr: 0.000080 grad: 0.1592 (0.1774) loss: 0.7025 (0.6945) time: 0.1445 data: 0.0551 max mem: 9377 +Train: [43] [4400/6250] eta: 0:04:58 lr: 0.000080 grad: 0.1585 (0.1770) loss: 0.7086 (0.6948) time: 0.1794 data: 0.0941 max mem: 9377 +Train: [43] [4500/6250] eta: 0:04:41 lr: 0.000080 grad: 0.1552 (0.1766) loss: 0.6986 (0.6949) time: 0.1613 data: 0.0767 max mem: 9377 +Train: [43] [4600/6250] eta: 0:04:25 lr: 0.000080 grad: 0.1562 (0.1762) loss: 0.6950 (0.6950) time: 0.1379 data: 0.0489 max mem: 9377 +Train: [43] [4700/6250] eta: 0:04:09 lr: 0.000080 grad: 0.1617 (0.1760) loss: 0.6965 (0.6950) time: 0.1494 data: 0.0615 max mem: 9377 +Train: [43] [4800/6250] eta: 0:03:53 lr: 0.000080 grad: 0.1607 (0.1758) loss: 0.6922 (0.6949) time: 0.1574 data: 0.0622 max mem: 9377 +Train: [43] [4900/6250] eta: 0:03:36 lr: 0.000080 grad: 0.1605 (0.1755) loss: 0.7054 (0.6949) time: 0.1381 data: 0.0506 max mem: 9377 +Train: [43] [5000/6250] eta: 0:03:20 lr: 0.000080 grad: 0.1695 (0.1752) loss: 0.6766 (0.6948) time: 0.1800 data: 0.0964 max mem: 9377 +Train: [43] [5100/6250] eta: 0:03:04 lr: 0.000080 grad: 0.1526 (0.1750) loss: 0.6908 (0.6947) time: 0.1601 data: 0.0733 max mem: 9377 +Train: [43] [5200/6250] eta: 0:02:48 lr: 0.000080 grad: 0.1586 (0.1747) loss: 0.6837 (0.6945) time: 0.1558 data: 0.0602 max mem: 9377 +Train: [43] [5300/6250] eta: 0:02:32 lr: 0.000080 grad: 0.1612 (0.1745) loss: 0.6776 (0.6942) time: 0.1449 data: 0.0547 max mem: 9377 +Train: [43] [5400/6250] eta: 0:02:16 lr: 0.000080 grad: 0.1587 (0.1742) loss: 0.6803 (0.6940) time: 0.1569 data: 0.0737 max mem: 9377 +Train: [43] [5500/6250] eta: 0:02:00 lr: 0.000080 grad: 0.1619 (0.1741) loss: 0.6929 (0.6937) time: 0.1608 data: 0.0677 max mem: 9377 +Train: [43] [5600/6250] eta: 0:01:44 lr: 0.000080 grad: 0.1642 (0.1738) loss: 0.6823 (0.6935) time: 0.1514 data: 0.0630 max mem: 9377 +Train: [43] [5700/6250] eta: 0:01:27 lr: 0.000080 grad: 0.1629 (0.1737) loss: 0.6853 (0.6933) time: 0.1740 data: 0.0855 max mem: 9377 +Train: [43] [5800/6250] eta: 0:01:11 lr: 0.000080 grad: 0.1579 (0.1734) loss: 0.7021 (0.6933) time: 0.1529 data: 0.0666 max mem: 9377 +Train: [43] [5900/6250] eta: 0:00:55 lr: 0.000080 grad: 0.1568 (0.1732) loss: 0.6888 (0.6932) time: 0.1651 data: 0.0820 max mem: 9377 +Train: [43] [6000/6250] eta: 0:00:39 lr: 0.000080 grad: 0.1653 (0.1730) loss: 0.6864 (0.6931) time: 0.1562 data: 0.0627 max mem: 9377 +Train: [43] [6100/6250] eta: 0:00:23 lr: 0.000080 grad: 0.1564 (0.1728) loss: 0.6973 (0.6930) time: 0.1865 data: 0.1034 max mem: 9377 +Train: [43] [6200/6250] eta: 0:00:07 lr: 0.000080 grad: 0.1616 (0.1727) loss: 0.6826 (0.6930) time: 0.1249 data: 0.0307 max mem: 9377 +Train: [43] [6249/6250] eta: 0:00:00 lr: 0.000080 grad: 0.1687 (0.1726) loss: 0.6982 (0.6929) time: 0.1665 data: 0.0793 max mem: 9377 +Train: [43] Total time: 0:16:44 (0.1608 s / it) +Averaged stats: lr: 0.000080 grad: 0.1687 (0.1726) loss: 0.6982 (0.6929) +Eval (hcp-train-subset): [43] [ 0/62] eta: 0:05:48 loss: 0.8834 (0.8834) time: 5.6185 data: 5.5721 max mem: 9377 +Eval (hcp-train-subset): [43] [61/62] eta: 0:00:00 loss: 0.8878 (0.8902) time: 0.1283 data: 0.1035 max mem: 9377 +Eval (hcp-train-subset): [43] Total time: 0:00:14 (0.2379 s / it) +Averaged stats (hcp-train-subset): loss: 0.8878 (0.8902) +Eval (hcp-val): [43] [ 0/62] eta: 0:03:41 loss: 0.8876 (0.8876) time: 3.5779 data: 3.5012 max mem: 9377 +Eval (hcp-val): [43] [61/62] eta: 0:00:00 loss: 0.8844 (0.8886) time: 0.1348 data: 0.1078 max mem: 9377 +Eval (hcp-val): [43] Total time: 0:00:14 (0.2369 s / it) +Averaged stats (hcp-val): loss: 0.8844 (0.8886) +Eval (nsd-val): [43] [ 0/62] eta: 0:05:13 loss: 0.8594 (0.8594) time: 5.0541 data: 5.0230 max mem: 9377 +Eval (nsd-val): [43] [61/62] eta: 0:00:00 loss: 0.8733 (0.8745) time: 0.1480 data: 0.1228 max mem: 9377 +Eval (nsd-val): [43] Total time: 0:00:15 (0.2426 s / it) +Averaged stats (nsd-val): loss: 0.8733 (0.8745) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [44] [ 0/6250] eta: 9:50:15 lr: 0.000080 grad: 0.1685 (0.1685) loss: 0.7967 (0.7967) time: 5.6665 data: 5.3471 max mem: 9377 +Train: [44] [ 100/6250] eta: 0:23:23 lr: 0.000080 grad: 0.3127 (0.3152) loss: 0.7256 (0.7264) time: 0.2021 data: 0.1048 max mem: 9377 +Train: [44] [ 200/6250] eta: 0:19:58 lr: 0.000080 grad: 0.3008 (0.3399) loss: 0.6834 (0.7111) time: 0.1231 data: 0.0152 max mem: 9377 +Train: [44] [ 300/6250] eta: 0:18:53 lr: 0.000080 grad: 0.2485 (0.3185) loss: 0.6835 (0.7020) time: 0.1969 data: 0.1027 max mem: 9377 +Train: [44] [ 400/6250] eta: 0:18:03 lr: 0.000080 grad: 0.2592 (0.3059) loss: 0.6845 (0.6996) time: 0.1593 data: 0.0624 max mem: 9377 +Train: [44] [ 500/6250] eta: 0:17:51 lr: 0.000080 grad: 0.2547 (0.2985) loss: 0.6813 (0.6974) time: 0.1800 data: 0.0935 max mem: 9377 +Train: [44] [ 600/6250] eta: 0:17:26 lr: 0.000080 grad: 0.2215 (0.2883) loss: 0.6690 (0.6944) time: 0.1931 data: 0.0851 max mem: 9377 +Train: [44] [ 700/6250] eta: 0:16:51 lr: 0.000080 grad: 0.2031 (0.2764) loss: 0.6886 (0.6926) time: 0.1682 data: 0.0716 max mem: 9377 +Train: [44] [ 800/6250] eta: 0:16:13 lr: 0.000080 grad: 0.2077 (0.2694) loss: 0.6967 (0.6917) time: 0.1736 data: 0.0791 max mem: 9377 +Train: [44] [ 900/6250] eta: 0:15:51 lr: 0.000080 grad: 0.1639 (0.2596) loss: 0.6953 (0.6923) time: 0.1593 data: 0.0580 max mem: 9377 +Train: [44] [1000/6250] eta: 0:15:20 lr: 0.000080 grad: 0.1587 (0.2507) loss: 0.7051 (0.6927) time: 0.1509 data: 0.0627 max mem: 9377 +Train: [44] [1100/6250] eta: 0:14:53 lr: 0.000079 grad: 0.1603 (0.2429) loss: 0.7004 (0.6931) time: 0.1567 data: 0.0562 max mem: 9377 +Train: [44] [1200/6250] eta: 0:14:24 lr: 0.000079 grad: 0.1676 (0.2365) loss: 0.6864 (0.6933) time: 0.1462 data: 0.0463 max mem: 9377 +Train: [44] [1300/6250] eta: 0:14:00 lr: 0.000079 grad: 0.1489 (0.2306) loss: 0.7078 (0.6936) time: 0.1504 data: 0.0594 max mem: 9377 +Train: [44] [1400/6250] eta: 0:13:40 lr: 0.000079 grad: 0.1537 (0.2255) loss: 0.7063 (0.6936) time: 0.1653 data: 0.0857 max mem: 9377 +Train: [44] [1500/6250] eta: 0:13:25 lr: 0.000079 grad: 0.1580 (0.2212) loss: 0.6919 (0.6938) time: 0.1907 data: 0.1093 max mem: 9377 +Train: [44] [1600/6250] eta: 0:13:04 lr: 0.000079 grad: 0.1538 (0.2174) loss: 0.7053 (0.6939) time: 0.1653 data: 0.0801 max mem: 9377 +Train: [44] [1700/6250] eta: 0:12:54 lr: 0.000079 grad: 0.1591 (0.2141) loss: 0.6914 (0.6940) time: 0.2089 data: 0.1227 max mem: 9377 +Train: [44] [1800/6250] eta: 0:12:38 lr: 0.000079 grad: 0.1609 (0.2110) loss: 0.6957 (0.6944) time: 0.1854 data: 0.0999 max mem: 9377 +Train: [44] [1900/6250] eta: 0:12:23 lr: 0.000079 grad: 0.1576 (0.2085) loss: 0.6852 (0.6941) time: 0.1969 data: 0.1192 max mem: 9377 +Train: [44] [2000/6250] eta: 0:12:06 lr: 0.000079 grad: 0.1583 (0.2063) loss: 0.6881 (0.6941) time: 0.1970 data: 0.1183 max mem: 9377 +Train: [44] [2100/6250] eta: 0:11:52 lr: 0.000079 grad: 0.1562 (0.2045) loss: 0.7050 (0.6938) time: 0.1738 data: 0.0707 max mem: 9377 +Train: [44] [2200/6250] eta: 0:11:33 lr: 0.000079 grad: 0.1631 (0.2027) loss: 0.6910 (0.6936) time: 0.1380 data: 0.0425 max mem: 9377 +Train: [44] [2300/6250] eta: 0:11:13 lr: 0.000079 grad: 0.1642 (0.2011) loss: 0.6834 (0.6934) time: 0.1639 data: 0.0707 max mem: 9377 +Train: [44] [2400/6250] eta: 0:10:54 lr: 0.000079 grad: 0.1641 (0.1995) loss: 0.6760 (0.6934) time: 0.1312 data: 0.0385 max mem: 9377 +Train: [44] [2500/6250] eta: 0:10:34 lr: 0.000079 grad: 0.1611 (0.1980) loss: 0.6866 (0.6932) time: 0.1472 data: 0.0553 max mem: 9377 +Train: [44] [2600/6250] eta: 0:10:15 lr: 0.000079 grad: 0.1544 (0.1966) loss: 0.6987 (0.6930) time: 0.1654 data: 0.0747 max mem: 9377 +Train: [44] [2700/6250] eta: 0:09:57 lr: 0.000079 grad: 0.1513 (0.1952) loss: 0.7124 (0.6932) time: 0.1565 data: 0.0689 max mem: 9377 +Train: [44] [2800/6250] eta: 0:09:38 lr: 0.000079 grad: 0.1587 (0.1939) loss: 0.6821 (0.6932) time: 0.1652 data: 0.0708 max mem: 9377 +Train: [44] [2900/6250] eta: 0:09:20 lr: 0.000079 grad: 0.1538 (0.1928) loss: 0.6958 (0.6934) time: 0.1936 data: 0.1098 max mem: 9377 +Train: [44] [3000/6250] eta: 0:09:05 lr: 0.000079 grad: 0.1621 (0.1917) loss: 0.6969 (0.6936) time: 0.1576 data: 0.0673 max mem: 9377 +Train: [44] [3100/6250] eta: 0:08:51 lr: 0.000079 grad: 0.1691 (0.1908) loss: 0.7031 (0.6938) time: 0.2075 data: 0.1275 max mem: 9377 +Train: [44] [3200/6250] eta: 0:08:36 lr: 0.000079 grad: 0.1597 (0.1899) loss: 0.6970 (0.6940) time: 0.1814 data: 0.0944 max mem: 9377 +Train: [44] [3300/6250] eta: 0:08:21 lr: 0.000079 grad: 0.1566 (0.1891) loss: 0.7077 (0.6943) time: 0.2136 data: 0.1137 max mem: 9377 +Train: [44] [3400/6250] eta: 0:08:05 lr: 0.000079 grad: 0.1586 (0.1882) loss: 0.6894 (0.6946) time: 0.1896 data: 0.0966 max mem: 9377 +Train: [44] [3500/6250] eta: 0:07:48 lr: 0.000079 grad: 0.1594 (0.1875) loss: 0.6833 (0.6948) time: 0.1778 data: 0.0856 max mem: 9377 +Train: [44] [3600/6250] eta: 0:07:31 lr: 0.000079 grad: 0.1535 (0.1867) loss: 0.7027 (0.6948) time: 0.1620 data: 0.0636 max mem: 9377 +Train: [44] [3700/6250] eta: 0:07:14 lr: 0.000079 grad: 0.1616 (0.1860) loss: 0.7060 (0.6950) time: 0.1545 data: 0.0629 max mem: 9377 +Train: [44] [3800/6250] eta: 0:06:57 lr: 0.000079 grad: 0.1590 (0.1854) loss: 0.7159 (0.6952) time: 0.1491 data: 0.0519 max mem: 9377 +Train: [44] [3900/6250] eta: 0:06:40 lr: 0.000079 grad: 0.1572 (0.1846) loss: 0.7052 (0.6954) time: 0.1771 data: 0.0933 max mem: 9377 +Train: [44] [4000/6250] eta: 0:06:22 lr: 0.000079 grad: 0.1641 (0.1841) loss: 0.6941 (0.6955) time: 0.1647 data: 0.0752 max mem: 9377 +Train: [44] [4100/6250] eta: 0:06:04 lr: 0.000079 grad: 0.1523 (0.1835) loss: 0.6972 (0.6956) time: 0.1600 data: 0.0797 max mem: 9377 +Train: [44] [4200/6250] eta: 0:05:47 lr: 0.000078 grad: 0.1596 (0.1830) loss: 0.7014 (0.6958) time: 0.1481 data: 0.0603 max mem: 9377 +Train: [44] [4300/6250] eta: 0:05:29 lr: 0.000078 grad: 0.1628 (0.1825) loss: 0.6993 (0.6961) time: 0.1457 data: 0.0582 max mem: 9377 +Train: [44] [4400/6250] eta: 0:05:12 lr: 0.000078 grad: 0.1634 (0.1820) loss: 0.6968 (0.6962) time: 0.1745 data: 0.0887 max mem: 9377 +Train: [44] [4500/6250] eta: 0:04:55 lr: 0.000078 grad: 0.1576 (0.1815) loss: 0.7041 (0.6964) time: 0.1480 data: 0.0638 max mem: 9377 +Train: [44] [4600/6250] eta: 0:04:38 lr: 0.000078 grad: 0.1597 (0.1810) loss: 0.6966 (0.6965) time: 0.1403 data: 0.0369 max mem: 9377 +Train: [44] [4700/6250] eta: 0:04:20 lr: 0.000078 grad: 0.1610 (0.1807) loss: 0.6913 (0.6966) time: 0.1460 data: 0.0553 max mem: 9377 +Train: [44] [4800/6250] eta: 0:04:03 lr: 0.000078 grad: 0.1560 (0.1803) loss: 0.6987 (0.6966) time: 0.1588 data: 0.0699 max mem: 9377 +Train: [44] [4900/6250] eta: 0:03:46 lr: 0.000078 grad: 0.1637 (0.1799) loss: 0.6974 (0.6967) time: 0.1690 data: 0.0837 max mem: 9377 +Train: [44] [5000/6250] eta: 0:03:29 lr: 0.000078 grad: 0.1624 (0.1796) loss: 0.6922 (0.6966) time: 0.1593 data: 0.0693 max mem: 9377 +Train: [44] [5100/6250] eta: 0:03:12 lr: 0.000078 grad: 0.1579 (0.1793) loss: 0.6917 (0.6966) time: 0.1328 data: 0.0327 max mem: 9377 +Train: [44] [5200/6250] eta: 0:02:55 lr: 0.000078 grad: 0.1590 (0.1790) loss: 0.7076 (0.6967) time: 0.1801 data: 0.0906 max mem: 9377 +Train: [44] [5300/6250] eta: 0:02:38 lr: 0.000078 grad: 0.1603 (0.1786) loss: 0.7027 (0.6968) time: 0.1445 data: 0.0557 max mem: 9377 +Train: [44] [5400/6250] eta: 0:02:22 lr: 0.000078 grad: 0.1645 (0.1783) loss: 0.6887 (0.6967) time: 0.1532 data: 0.0694 max mem: 9377 +Train: [44] [5500/6250] eta: 0:02:05 lr: 0.000078 grad: 0.1541 (0.1779) loss: 0.6889 (0.6968) time: 0.1504 data: 0.0649 max mem: 9377 +Train: [44] [5600/6250] eta: 0:01:48 lr: 0.000078 grad: 0.1541 (0.1776) loss: 0.6845 (0.6968) time: 0.1693 data: 0.0849 max mem: 9377 +Train: [44] [5700/6250] eta: 0:01:31 lr: 0.000078 grad: 0.1578 (0.1773) loss: 0.6826 (0.6967) time: 0.1490 data: 0.0585 max mem: 9377 +Train: [44] [5800/6250] eta: 0:01:14 lr: 0.000078 grad: 0.1677 (0.1771) loss: 0.6727 (0.6965) time: 0.1448 data: 0.0513 max mem: 9377 +Train: [44] [5900/6250] eta: 0:00:58 lr: 0.000078 grad: 0.1619 (0.1768) loss: 0.6890 (0.6964) time: 0.1764 data: 0.0930 max mem: 9377 +Train: [44] [6000/6250] eta: 0:00:41 lr: 0.000078 grad: 0.1684 (0.1767) loss: 0.6810 (0.6962) time: 0.1626 data: 0.0840 max mem: 9377 +Train: [44] [6100/6250] eta: 0:00:24 lr: 0.000078 grad: 0.1684 (0.1764) loss: 0.6845 (0.6962) time: 0.1309 data: 0.0430 max mem: 9377 +Train: [44] [6200/6250] eta: 0:00:08 lr: 0.000078 grad: 0.1651 (0.1762) loss: 0.6961 (0.6961) time: 0.1445 data: 0.0604 max mem: 9377 +Train: [44] [6249/6250] eta: 0:00:00 lr: 0.000078 grad: 0.1573 (0.1761) loss: 0.6999 (0.6960) time: 0.1586 data: 0.0699 max mem: 9377 +Train: [44] Total time: 0:17:21 (0.1667 s / it) +Averaged stats: lr: 0.000078 grad: 0.1573 (0.1761) loss: 0.6999 (0.6960) +Eval (hcp-train-subset): [44] [ 0/62] eta: 0:05:03 loss: 0.8791 (0.8791) time: 4.8903 data: 4.8584 max mem: 9377 +Eval (hcp-train-subset): [44] [61/62] eta: 0:00:00 loss: 0.8882 (0.8926) time: 0.1393 data: 0.1141 max mem: 9377 +Eval (hcp-train-subset): [44] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (hcp-train-subset): loss: 0.8882 (0.8926) +Making plots (hcp-train-subset): example=37 +Eval (hcp-val): [44] [ 0/62] eta: 0:05:38 loss: 0.8954 (0.8954) time: 5.4604 data: 5.4294 max mem: 9377 +Eval (hcp-val): [44] [61/62] eta: 0:00:00 loss: 0.8905 (0.8922) time: 0.1388 data: 0.1134 max mem: 9377 +Eval (hcp-val): [44] Total time: 0:00:14 (0.2389 s / it) +Averaged stats (hcp-val): loss: 0.8905 (0.8922) +Making plots (hcp-val): example=5 +Eval (nsd-val): [44] [ 0/62] eta: 0:04:32 loss: 0.8720 (0.8720) time: 4.3923 data: 4.3117 max mem: 9377 +Eval (nsd-val): [44] [61/62] eta: 0:00:00 loss: 0.8760 (0.8744) time: 0.1450 data: 0.1199 max mem: 9377 +Eval (nsd-val): [44] Total time: 0:00:14 (0.2294 s / it) +Averaged stats (nsd-val): loss: 0.8760 (0.8744) +Making plots (nsd-val): example=7 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00044.pth +Train: [45] [ 0/6250] eta: 11:48:34 lr: 0.000078 grad: 0.3016 (0.3016) loss: 0.6368 (0.6368) time: 6.8022 data: 6.6585 max mem: 9377 +Train: [45] [ 100/6250] eta: 0:22:45 lr: 0.000078 grad: 0.3281 (0.3309) loss: 0.6912 (0.7193) time: 0.1644 data: 0.0597 max mem: 9377 +Train: [45] [ 200/6250] eta: 0:19:38 lr: 0.000078 grad: 0.3634 (0.3496) loss: 0.6812 (0.7024) time: 0.1750 data: 0.0782 max mem: 9377 +Train: [45] [ 300/6250] eta: 0:18:10 lr: 0.000078 grad: 0.3015 (0.3396) loss: 0.6831 (0.6950) time: 0.1533 data: 0.0572 max mem: 9377 +Train: [45] [ 400/6250] eta: 0:17:25 lr: 0.000078 grad: 0.2303 (0.3190) loss: 0.6559 (0.6911) time: 0.1882 data: 0.0941 max mem: 9377 +Train: [45] [ 500/6250] eta: 0:17:03 lr: 0.000078 grad: 0.2150 (0.3005) loss: 0.6828 (0.6890) time: 0.1771 data: 0.0931 max mem: 9377 +Train: [45] [ 600/6250] eta: 0:16:46 lr: 0.000078 grad: 0.1626 (0.2810) loss: 0.6834 (0.6884) time: 0.1561 data: 0.0742 max mem: 9377 +Train: [45] [ 700/6250] eta: 0:16:27 lr: 0.000078 grad: 0.1678 (0.2660) loss: 0.6822 (0.6884) time: 0.1400 data: 0.0491 max mem: 9377 +Train: [45] [ 800/6250] eta: 0:16:01 lr: 0.000078 grad: 0.1582 (0.2534) loss: 0.6985 (0.6890) time: 0.1502 data: 0.0591 max mem: 9377 +Train: [45] [ 900/6250] eta: 0:15:55 lr: 0.000078 grad: 0.1557 (0.2437) loss: 0.7057 (0.6896) time: 0.1974 data: 0.1017 max mem: 9377 +Train: [45] [1000/6250] eta: 0:15:30 lr: 0.000078 grad: 0.1631 (0.2355) loss: 0.7000 (0.6905) time: 0.1511 data: 0.0581 max mem: 9377 +Train: [45] [1100/6250] eta: 0:15:04 lr: 0.000077 grad: 0.1591 (0.2287) loss: 0.6817 (0.6911) time: 0.1527 data: 0.0529 max mem: 9377 +Train: [45] [1200/6250] eta: 0:14:39 lr: 0.000077 grad: 0.1593 (0.2231) loss: 0.7114 (0.6918) time: 0.1447 data: 0.0545 max mem: 9377 +Train: [45] [1300/6250] eta: 0:14:13 lr: 0.000077 grad: 0.1537 (0.2182) loss: 0.6943 (0.6921) time: 0.1391 data: 0.0484 max mem: 9377 +Train: [45] [1400/6250] eta: 0:13:48 lr: 0.000077 grad: 0.1611 (0.2142) loss: 0.6859 (0.6921) time: 0.1448 data: 0.0614 max mem: 9377 +Train: [45] [1500/6250] eta: 0:13:26 lr: 0.000077 grad: 0.1617 (0.2107) loss: 0.6919 (0.6919) time: 0.1549 data: 0.0592 max mem: 9377 +Train: [45] [1600/6250] eta: 0:13:08 lr: 0.000077 grad: 0.1542 (0.2077) loss: 0.7015 (0.6919) time: 0.1899 data: 0.1100 max mem: 9377 +Train: [45] [1700/6250] eta: 0:12:50 lr: 0.000077 grad: 0.1607 (0.2050) loss: 0.6785 (0.6917) time: 0.1659 data: 0.0824 max mem: 9377 +Train: [45] [1800/6250] eta: 0:12:29 lr: 0.000077 grad: 0.1576 (0.2028) loss: 0.6855 (0.6915) time: 0.1320 data: 0.0509 max mem: 9377 +Train: [45] [1900/6250] eta: 0:12:11 lr: 0.000077 grad: 0.1584 (0.2005) loss: 0.6903 (0.6916) time: 0.1409 data: 0.0553 max mem: 9377 +Train: [45] [2000/6250] eta: 0:11:50 lr: 0.000077 grad: 0.1621 (0.1985) loss: 0.6828 (0.6916) time: 0.1480 data: 0.0615 max mem: 9377 +Train: [45] [2100/6250] eta: 0:11:35 lr: 0.000077 grad: 0.1599 (0.1967) loss: 0.6920 (0.6917) time: 0.1599 data: 0.0615 max mem: 9377 +Train: [45] [2200/6250] eta: 0:11:19 lr: 0.000077 grad: 0.1571 (0.1950) loss: 0.7009 (0.6917) time: 0.1748 data: 0.0776 max mem: 9377 +Train: [45] [2300/6250] eta: 0:11:04 lr: 0.000077 grad: 0.1551 (0.1935) loss: 0.7019 (0.6917) time: 0.1648 data: 0.0693 max mem: 9377 +Train: [45] [2400/6250] eta: 0:10:45 lr: 0.000077 grad: 0.1545 (0.1922) loss: 0.6935 (0.6915) time: 0.1709 data: 0.0809 max mem: 9377 +Train: [45] [2500/6250] eta: 0:10:26 lr: 0.000077 grad: 0.1656 (0.1911) loss: 0.6898 (0.6914) time: 0.1554 data: 0.0567 max mem: 9377 +Train: [45] [2600/6250] eta: 0:10:08 lr: 0.000077 grad: 0.1547 (0.1899) loss: 0.6973 (0.6916) time: 0.1522 data: 0.0645 max mem: 9377 +Train: [45] [2700/6250] eta: 0:09:50 lr: 0.000077 grad: 0.1612 (0.1889) loss: 0.7000 (0.6918) time: 0.1417 data: 0.0482 max mem: 9377 +Train: [45] [2800/6250] eta: 0:09:33 lr: 0.000077 grad: 0.1666 (0.1879) loss: 0.6978 (0.6919) time: 0.1583 data: 0.0732 max mem: 9377 +Train: [45] [2900/6250] eta: 0:09:17 lr: 0.000077 grad: 0.1622 (0.1871) loss: 0.6793 (0.6918) time: 0.1652 data: 0.0775 max mem: 9377 +Train: [45] [3000/6250] eta: 0:09:00 lr: 0.000077 grad: 0.1586 (0.1863) loss: 0.6838 (0.6917) time: 0.1532 data: 0.0697 max mem: 9377 +Train: [45] [3100/6250] eta: 0:08:43 lr: 0.000077 grad: 0.1664 (0.1856) loss: 0.6918 (0.6916) time: 0.1549 data: 0.0803 max mem: 9377 +Train: [45] [3200/6250] eta: 0:08:26 lr: 0.000077 grad: 0.1610 (0.1849) loss: 0.6912 (0.6916) time: 0.1482 data: 0.0663 max mem: 9377 +Train: [45] [3300/6250] eta: 0:08:08 lr: 0.000077 grad: 0.1628 (0.1842) loss: 0.6964 (0.6917) time: 0.1509 data: 0.0713 max mem: 9377 +Train: [45] [3400/6250] eta: 0:07:52 lr: 0.000077 grad: 0.1586 (0.1835) loss: 0.6980 (0.6918) time: 0.1598 data: 0.0740 max mem: 9377 +Train: [45] [3500/6250] eta: 0:07:36 lr: 0.000077 grad: 0.1653 (0.1831) loss: 0.6799 (0.6917) time: 0.1645 data: 0.0727 max mem: 9377 +Train: [45] [3600/6250] eta: 0:07:19 lr: 0.000077 grad: 0.1591 (0.1826) loss: 0.7000 (0.6919) time: 0.1476 data: 0.0532 max mem: 9377 +Train: [45] [3700/6250] eta: 0:07:02 lr: 0.000077 grad: 0.1656 (0.1821) loss: 0.6857 (0.6920) time: 0.1700 data: 0.0797 max mem: 9377 +Train: [45] [3800/6250] eta: 0:06:45 lr: 0.000077 grad: 0.1638 (0.1816) loss: 0.6939 (0.6920) time: 0.1726 data: 0.0785 max mem: 9377 +Train: [45] [3900/6250] eta: 0:06:28 lr: 0.000077 grad: 0.1625 (0.1811) loss: 0.6873 (0.6920) time: 0.1941 data: 0.1018 max mem: 9377 +Train: [45] [4000/6250] eta: 0:06:11 lr: 0.000077 grad: 0.1587 (0.1807) loss: 0.6941 (0.6919) time: 0.1542 data: 0.0646 max mem: 9377 +Train: [45] [4100/6250] eta: 0:05:54 lr: 0.000077 grad: 0.1704 (0.1803) loss: 0.6804 (0.6919) time: 0.1638 data: 0.0786 max mem: 9377 +Train: [45] [4200/6250] eta: 0:05:37 lr: 0.000076 grad: 0.1617 (0.1799) loss: 0.6911 (0.6919) time: 0.1692 data: 0.0833 max mem: 9377 +Train: [45] [4300/6250] eta: 0:05:20 lr: 0.000076 grad: 0.1626 (0.1795) loss: 0.6864 (0.6919) time: 0.1643 data: 0.0792 max mem: 9377 +Train: [45] [4400/6250] eta: 0:05:03 lr: 0.000076 grad: 0.1674 (0.1792) loss: 0.6801 (0.6918) time: 0.1389 data: 0.0490 max mem: 9377 +Train: [45] [4500/6250] eta: 0:04:47 lr: 0.000076 grad: 0.1633 (0.1789) loss: 0.6846 (0.6917) time: 0.1528 data: 0.0741 max mem: 9377 +Train: [45] [4600/6250] eta: 0:04:30 lr: 0.000076 grad: 0.1655 (0.1787) loss: 0.6968 (0.6917) time: 0.1613 data: 0.0747 max mem: 9377 +Train: [45] [4700/6250] eta: 0:04:13 lr: 0.000076 grad: 0.1632 (0.1784) loss: 0.6788 (0.6916) time: 0.1607 data: 0.0744 max mem: 9377 +Train: [45] [4800/6250] eta: 0:03:56 lr: 0.000076 grad: 0.1706 (0.1782) loss: 0.6831 (0.6915) time: 0.1502 data: 0.0564 max mem: 9377 +Train: [45] [4900/6250] eta: 0:03:40 lr: 0.000076 grad: 0.1624 (0.1780) loss: 0.6832 (0.6915) time: 0.1566 data: 0.0711 max mem: 9377 +Train: [45] [5000/6250] eta: 0:03:24 lr: 0.000076 grad: 0.1602 (0.1777) loss: 0.6931 (0.6915) time: 0.1648 data: 0.0835 max mem: 9377 +Train: [45] [5100/6250] eta: 0:03:07 lr: 0.000076 grad: 0.1661 (0.1775) loss: 0.6735 (0.6914) time: 0.1464 data: 0.0566 max mem: 9377 +Train: [45] [5200/6250] eta: 0:02:51 lr: 0.000076 grad: 0.1672 (0.1773) loss: 0.6699 (0.6912) time: 0.1809 data: 0.0988 max mem: 9377 +Train: [45] [5300/6250] eta: 0:02:34 lr: 0.000076 grad: 0.1661 (0.1772) loss: 0.6888 (0.6911) time: 0.1405 data: 0.0548 max mem: 9377 +Train: [45] [5400/6250] eta: 0:02:18 lr: 0.000076 grad: 0.1601 (0.1770) loss: 0.6864 (0.6909) time: 0.1584 data: 0.0753 max mem: 9377 +Train: [45] [5500/6250] eta: 0:02:02 lr: 0.000076 grad: 0.1594 (0.1767) loss: 0.6787 (0.6908) time: 0.1410 data: 0.0485 max mem: 9377 +Train: [45] [5600/6250] eta: 0:01:45 lr: 0.000076 grad: 0.1663 (0.1766) loss: 0.6835 (0.6907) time: 0.1969 data: 0.1115 max mem: 9377 +Train: [45] [5700/6250] eta: 0:01:29 lr: 0.000076 grad: 0.1554 (0.1764) loss: 0.6878 (0.6906) time: 0.1514 data: 0.0682 max mem: 9377 +Train: [45] [5800/6250] eta: 0:01:13 lr: 0.000076 grad: 0.1661 (0.1762) loss: 0.6880 (0.6905) time: 0.1724 data: 0.0903 max mem: 9377 +Train: [45] [5900/6250] eta: 0:00:56 lr: 0.000076 grad: 0.1719 (0.1760) loss: 0.6848 (0.6905) time: 0.1046 data: 0.0035 max mem: 9377 +Train: [45] [6000/6250] eta: 0:00:40 lr: 0.000076 grad: 0.1628 (0.1758) loss: 0.6878 (0.6904) time: 0.1223 data: 0.0284 max mem: 9377 +Train: [45] [6100/6250] eta: 0:00:24 lr: 0.000076 grad: 0.1617 (0.1757) loss: 0.7050 (0.6904) time: 0.1625 data: 0.0758 max mem: 9377 +Train: [45] [6200/6250] eta: 0:00:08 lr: 0.000076 grad: 0.1609 (0.1755) loss: 0.6882 (0.6903) time: 0.1331 data: 0.0536 max mem: 9377 +Train: [45] [6249/6250] eta: 0:00:00 lr: 0.000076 grad: 0.1630 (0.1754) loss: 0.6967 (0.6903) time: 0.1268 data: 0.0335 max mem: 9377 +Train: [45] Total time: 0:16:58 (0.1630 s / it) +Averaged stats: lr: 0.000076 grad: 0.1630 (0.1754) loss: 0.6967 (0.6903) +Eval (hcp-train-subset): [45] [ 0/62] eta: 0:04:03 loss: 0.8813 (0.8813) time: 3.9339 data: 3.8019 max mem: 9377 +Eval (hcp-train-subset): [45] [61/62] eta: 0:00:00 loss: 0.8905 (0.8926) time: 0.1409 data: 0.1146 max mem: 9377 +Eval (hcp-train-subset): [45] Total time: 0:00:15 (0.2536 s / it) +Averaged stats (hcp-train-subset): loss: 0.8905 (0.8926) +Eval (hcp-val): [45] [ 0/62] eta: 0:06:31 loss: 0.8902 (0.8902) time: 6.3131 data: 6.2800 max mem: 9377 +Eval (hcp-val): [45] [61/62] eta: 0:00:00 loss: 0.8902 (0.8917) time: 0.1529 data: 0.1269 max mem: 9377 +Eval (hcp-val): [45] Total time: 0:00:16 (0.2612 s / it) +Averaged stats (hcp-val): loss: 0.8902 (0.8917) +Eval (nsd-val): [45] [ 0/62] eta: 0:06:02 loss: 0.8621 (0.8621) time: 5.8484 data: 5.8156 max mem: 9377 +Eval (nsd-val): [45] [61/62] eta: 0:00:00 loss: 0.8731 (0.8754) time: 0.1720 data: 0.1460 max mem: 9377 +Eval (nsd-val): [45] Total time: 0:00:16 (0.2681 s / it) +Averaged stats (nsd-val): loss: 0.8731 (0.8754) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [46] [ 0/6250] eta: 10:52:21 lr: 0.000076 grad: 0.3141 (0.3141) loss: 0.6663 (0.6663) time: 6.2627 data: 5.9243 max mem: 9377 +Train: [46] [ 100/6250] eta: 0:26:00 lr: 0.000076 grad: 0.2537 (0.2931) loss: 0.7279 (0.7374) time: 0.1486 data: 0.0096 max mem: 9377 +Train: [46] [ 200/6250] eta: 0:21:40 lr: 0.000076 grad: 0.2911 (0.2956) loss: 0.7134 (0.7237) time: 0.1913 data: 0.0899 max mem: 9377 +Train: [46] [ 300/6250] eta: 0:19:42 lr: 0.000076 grad: 0.2158 (0.3044) loss: 0.6912 (0.7195) time: 0.1688 data: 0.0706 max mem: 9377 +Train: [46] [ 400/6250] eta: 0:18:44 lr: 0.000076 grad: 0.2016 (0.2796) loss: 0.6978 (0.7171) time: 0.1853 data: 0.0797 max mem: 9377 +Train: [46] [ 500/6250] eta: 0:17:53 lr: 0.000076 grad: 0.1927 (0.2637) loss: 0.6918 (0.7137) time: 0.1827 data: 0.0875 max mem: 9377 +Train: [46] [ 600/6250] eta: 0:17:19 lr: 0.000076 grad: 0.1923 (0.2509) loss: 0.6863 (0.7129) time: 0.1710 data: 0.0802 max mem: 9377 +Train: [46] [ 700/6250] eta: 0:16:45 lr: 0.000076 grad: 0.1754 (0.2408) loss: 0.6856 (0.7112) time: 0.1415 data: 0.0489 max mem: 9377 +Train: [46] [ 800/6250] eta: 0:16:12 lr: 0.000076 grad: 0.1862 (0.2331) loss: 0.6963 (0.7092) time: 0.1548 data: 0.0710 max mem: 9377 +Train: [46] [ 900/6250] eta: 0:15:50 lr: 0.000076 grad: 0.1641 (0.2268) loss: 0.6998 (0.7083) time: 0.1425 data: 0.0541 max mem: 9377 +Train: [46] [1000/6250] eta: 0:15:27 lr: 0.000076 grad: 0.1601 (0.2205) loss: 0.7008 (0.7076) time: 0.1712 data: 0.0788 max mem: 9377 +Train: [46] [1100/6250] eta: 0:14:57 lr: 0.000075 grad: 0.1601 (0.2153) loss: 0.7038 (0.7067) time: 0.1326 data: 0.0464 max mem: 9377 +Train: [46] [1200/6250] eta: 0:14:31 lr: 0.000075 grad: 0.1574 (0.2109) loss: 0.6877 (0.7060) time: 0.1460 data: 0.0593 max mem: 9377 +Train: [46] [1300/6250] eta: 0:14:06 lr: 0.000075 grad: 0.1629 (0.2075) loss: 0.6991 (0.7050) time: 0.1603 data: 0.0771 max mem: 9377 +Train: [46] [1400/6250] eta: 0:13:46 lr: 0.000075 grad: 0.1649 (0.2043) loss: 0.6936 (0.7043) time: 0.1471 data: 0.0620 max mem: 9377 +Train: [46] [1500/6250] eta: 0:13:26 lr: 0.000075 grad: 0.1638 (0.2017) loss: 0.6818 (0.7035) time: 0.1500 data: 0.0654 max mem: 9377 +Train: [46] [1600/6250] eta: 0:13:06 lr: 0.000075 grad: 0.1603 (0.1993) loss: 0.6912 (0.7028) time: 0.1671 data: 0.0843 max mem: 9377 +Train: [46] [1700/6250] eta: 0:12:47 lr: 0.000075 grad: 0.1663 (0.1971) loss: 0.6929 (0.7023) time: 0.1990 data: 0.1141 max mem: 9377 +Train: [46] [1800/6250] eta: 0:12:31 lr: 0.000075 grad: 0.1629 (0.1952) loss: 0.6772 (0.7018) time: 0.1493 data: 0.0620 max mem: 9377 +Train: [46] [1900/6250] eta: 0:12:12 lr: 0.000075 grad: 0.1625 (0.1935) loss: 0.6965 (0.7015) time: 0.1524 data: 0.0666 max mem: 9377 +Train: [46] [2000/6250] eta: 0:11:56 lr: 0.000075 grad: 0.1579 (0.1920) loss: 0.6983 (0.7011) time: 0.1525 data: 0.0656 max mem: 9377 +Train: [46] [2100/6250] eta: 0:11:40 lr: 0.000075 grad: 0.1613 (0.1907) loss: 0.6925 (0.7008) time: 0.1958 data: 0.1089 max mem: 9377 +Train: [46] [2200/6250] eta: 0:11:22 lr: 0.000075 grad: 0.1617 (0.1895) loss: 0.7038 (0.7008) time: 0.1503 data: 0.0568 max mem: 9377 +Train: [46] [2300/6250] eta: 0:11:05 lr: 0.000075 grad: 0.1613 (0.1883) loss: 0.6856 (0.7006) time: 0.1684 data: 0.0782 max mem: 9377 +Train: [46] [2400/6250] eta: 0:10:48 lr: 0.000075 grad: 0.1667 (0.1875) loss: 0.6965 (0.7006) time: 0.1879 data: 0.1071 max mem: 9377 +Train: [46] [2500/6250] eta: 0:10:31 lr: 0.000075 grad: 0.1648 (0.1865) loss: 0.6859 (0.7003) time: 0.1661 data: 0.0772 max mem: 9377 +Train: [46] [2600/6250] eta: 0:10:13 lr: 0.000075 grad: 0.1603 (0.1856) loss: 0.6946 (0.7002) time: 0.1752 data: 0.0806 max mem: 9377 +Train: [46] [2700/6250] eta: 0:09:55 lr: 0.000075 grad: 0.1645 (0.1849) loss: 0.6943 (0.7000) time: 0.1703 data: 0.0799 max mem: 9377 +Train: [46] [2800/6250] eta: 0:09:38 lr: 0.000075 grad: 0.1783 (0.1843) loss: 0.6868 (0.6998) time: 0.1682 data: 0.0837 max mem: 9377 +Train: [46] [2900/6250] eta: 0:09:20 lr: 0.000075 grad: 0.1633 (0.1837) loss: 0.6929 (0.6996) time: 0.1784 data: 0.0935 max mem: 9377 +Train: [46] [3000/6250] eta: 0:09:01 lr: 0.000075 grad: 0.1572 (0.1831) loss: 0.6950 (0.6994) time: 0.1513 data: 0.0596 max mem: 9377 +Train: [46] [3100/6250] eta: 0:08:46 lr: 0.000075 grad: 0.1653 (0.1825) loss: 0.6875 (0.6992) time: 0.1561 data: 0.0744 max mem: 9377 +Train: [46] [3200/6250] eta: 0:08:29 lr: 0.000075 grad: 0.1620 (0.1819) loss: 0.6913 (0.6989) time: 0.1501 data: 0.0666 max mem: 9377 +Train: [46] [3300/6250] eta: 0:08:12 lr: 0.000075 grad: 0.1647 (0.1815) loss: 0.6789 (0.6985) time: 0.1763 data: 0.0921 max mem: 9377 +Train: [46] [3400/6250] eta: 0:07:55 lr: 0.000075 grad: 0.1657 (0.1811) loss: 0.6840 (0.6983) time: 0.1699 data: 0.0856 max mem: 9377 +Train: [46] [3500/6250] eta: 0:07:39 lr: 0.000075 grad: 0.1643 (0.1806) loss: 0.6940 (0.6981) time: 0.1735 data: 0.0830 max mem: 9377 +Train: [46] [3600/6250] eta: 0:07:22 lr: 0.000075 grad: 0.1624 (0.1802) loss: 0.6988 (0.6979) time: 0.1649 data: 0.0709 max mem: 9377 +Train: [46] [3700/6250] eta: 0:07:05 lr: 0.000075 grad: 0.1638 (0.1798) loss: 0.6875 (0.6978) time: 0.1622 data: 0.0700 max mem: 9377 +Train: [46] [3800/6250] eta: 0:06:49 lr: 0.000075 grad: 0.1581 (0.1793) loss: 0.6905 (0.6976) time: 0.1605 data: 0.0680 max mem: 9377 +Train: [46] [3900/6250] eta: 0:06:31 lr: 0.000075 grad: 0.1615 (0.1789) loss: 0.6878 (0.6974) time: 0.1462 data: 0.0411 max mem: 9377 +Train: [46] [4000/6250] eta: 0:06:14 lr: 0.000075 grad: 0.1729 (0.1786) loss: 0.6817 (0.6972) time: 0.1534 data: 0.0728 max mem: 9377 +Train: [46] [4100/6250] eta: 0:05:57 lr: 0.000075 grad: 0.1627 (0.1783) loss: 0.6839 (0.6970) time: 0.1360 data: 0.0436 max mem: 9377 +Train: [46] [4200/6250] eta: 0:05:40 lr: 0.000074 grad: 0.1738 (0.1780) loss: 0.6708 (0.6969) time: 0.1635 data: 0.0848 max mem: 9377 +Train: [46] [4300/6250] eta: 0:05:23 lr: 0.000074 grad: 0.1669 (0.1778) loss: 0.6708 (0.6965) time: 0.1717 data: 0.0902 max mem: 9377 +Train: [46] [4400/6250] eta: 0:05:06 lr: 0.000074 grad: 0.1670 (0.1776) loss: 0.6801 (0.6963) time: 0.1561 data: 0.0740 max mem: 9377 +Train: [46] [4500/6250] eta: 0:04:49 lr: 0.000074 grad: 0.1683 (0.1774) loss: 0.6768 (0.6960) time: 0.1424 data: 0.0512 max mem: 9377 +Train: [46] [4600/6250] eta: 0:04:32 lr: 0.000074 grad: 0.1621 (0.1773) loss: 0.6805 (0.6956) time: 0.1517 data: 0.0630 max mem: 9377 +Train: [46] [4700/6250] eta: 0:04:16 lr: 0.000074 grad: 0.1606 (0.1770) loss: 0.6818 (0.6954) time: 0.1611 data: 0.0800 max mem: 9377 +Train: [46] [4800/6250] eta: 0:04:00 lr: 0.000074 grad: 0.1664 (0.1768) loss: 0.6976 (0.6953) time: 0.2048 data: 0.1229 max mem: 9377 +Train: [46] [4900/6250] eta: 0:03:43 lr: 0.000074 grad: 0.1628 (0.1765) loss: 0.6918 (0.6953) time: 0.1547 data: 0.0774 max mem: 9377 +Train: [46] [5000/6250] eta: 0:03:26 lr: 0.000074 grad: 0.1590 (0.1763) loss: 0.6857 (0.6952) time: 0.2104 data: 0.1299 max mem: 9377 +Train: [46] [5100/6250] eta: 0:03:09 lr: 0.000074 grad: 0.1600 (0.1760) loss: 0.7018 (0.6952) time: 0.1130 data: 0.0239 max mem: 9377 +Train: [46] [5200/6250] eta: 0:02:53 lr: 0.000074 grad: 0.1670 (0.1758) loss: 0.6919 (0.6951) time: 0.1480 data: 0.0646 max mem: 9377 +Train: [46] [5300/6250] eta: 0:02:36 lr: 0.000074 grad: 0.1623 (0.1756) loss: 0.6745 (0.6949) time: 0.1753 data: 0.0946 max mem: 9377 +Train: [46] [5400/6250] eta: 0:02:20 lr: 0.000074 grad: 0.1594 (0.1755) loss: 0.7079 (0.6948) time: 0.2068 data: 0.1217 max mem: 9377 +Train: [46] [5500/6250] eta: 0:02:03 lr: 0.000074 grad: 0.1565 (0.1753) loss: 0.6919 (0.6947) time: 0.1565 data: 0.0619 max mem: 9377 +Train: [46] [5600/6250] eta: 0:01:46 lr: 0.000074 grad: 0.1702 (0.1752) loss: 0.6749 (0.6945) time: 0.1441 data: 0.0525 max mem: 9377 +Train: [46] [5700/6250] eta: 0:01:30 lr: 0.000074 grad: 0.1617 (0.1751) loss: 0.6819 (0.6943) time: 0.1783 data: 0.0951 max mem: 9377 +Train: [46] [5800/6250] eta: 0:01:13 lr: 0.000074 grad: 0.1662 (0.1749) loss: 0.6802 (0.6943) time: 0.1803 data: 0.0963 max mem: 9377 +Train: [46] [5900/6250] eta: 0:00:57 lr: 0.000074 grad: 0.1717 (0.1747) loss: 0.6879 (0.6942) time: 0.1732 data: 0.0835 max mem: 9377 +Train: [46] [6000/6250] eta: 0:00:40 lr: 0.000074 grad: 0.1695 (0.1746) loss: 0.6869 (0.6941) time: 0.1343 data: 0.0499 max mem: 9377 +Train: [46] [6100/6250] eta: 0:00:24 lr: 0.000074 grad: 0.1649 (0.1745) loss: 0.6862 (0.6940) time: 0.1546 data: 0.0669 max mem: 9377 +Train: [46] [6200/6250] eta: 0:00:08 lr: 0.000074 grad: 0.1677 (0.1744) loss: 0.6905 (0.6941) time: 0.1559 data: 0.0738 max mem: 9377 +Train: [46] [6249/6250] eta: 0:00:00 lr: 0.000074 grad: 0.1598 (0.1743) loss: 0.6890 (0.6940) time: 0.1733 data: 0.0887 max mem: 9377 +Train: [46] Total time: 0:17:11 (0.1650 s / it) +Averaged stats: lr: 0.000074 grad: 0.1598 (0.1743) loss: 0.6890 (0.6940) +Eval (hcp-train-subset): [46] [ 0/62] eta: 0:05:09 loss: 0.8822 (0.8822) time: 4.9843 data: 4.9149 max mem: 9377 +Eval (hcp-train-subset): [46] [61/62] eta: 0:00:00 loss: 0.8926 (0.8933) time: 0.1274 data: 0.1007 max mem: 9377 +Eval (hcp-train-subset): [46] Total time: 0:00:14 (0.2337 s / it) +Averaged stats (hcp-train-subset): loss: 0.8926 (0.8933) +Eval (hcp-val): [46] [ 0/62] eta: 0:04:04 loss: 0.8895 (0.8895) time: 3.9494 data: 3.8555 max mem: 9377 +Eval (hcp-val): [46] [61/62] eta: 0:00:00 loss: 0.8928 (0.8944) time: 0.1440 data: 0.1171 max mem: 9377 +Eval (hcp-val): [46] Total time: 0:00:14 (0.2347 s / it) +Averaged stats (hcp-val): loss: 0.8928 (0.8944) +Eval (nsd-val): [46] [ 0/62] eta: 0:03:56 loss: 0.8806 (0.8806) time: 3.8137 data: 3.7434 max mem: 9377 +Eval (nsd-val): [46] [61/62] eta: 0:00:00 loss: 0.8810 (0.8833) time: 0.1483 data: 0.1213 max mem: 9377 +Eval (nsd-val): [46] Total time: 0:00:14 (0.2341 s / it) +Averaged stats (nsd-val): loss: 0.8810 (0.8833) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [47] [ 0/6250] eta: 7:40:01 lr: 0.000074 grad: 0.1917 (0.1917) loss: 0.7855 (0.7855) time: 4.4162 data: 4.1786 max mem: 9377 +Train: [47] [ 100/6250] eta: 0:22:42 lr: 0.000074 grad: 0.3203 (0.3301) loss: 0.6788 (0.7147) time: 0.1611 data: 0.0600 max mem: 9377 +Train: [47] [ 200/6250] eta: 0:19:36 lr: 0.000074 grad: 0.3258 (0.3523) loss: 0.6750 (0.7046) time: 0.1439 data: 0.0538 max mem: 9377 +Train: [47] [ 300/6250] eta: 0:18:15 lr: 0.000074 grad: 0.2421 (0.3320) loss: 0.7038 (0.6992) time: 0.1725 data: 0.0797 max mem: 9377 +Train: [47] [ 400/6250] eta: 0:17:21 lr: 0.000074 grad: 0.2006 (0.3045) loss: 0.7027 (0.7000) time: 0.1470 data: 0.0492 max mem: 9377 +Train: [47] [ 500/6250] eta: 0:16:45 lr: 0.000074 grad: 0.1703 (0.2806) loss: 0.7081 (0.6998) time: 0.1800 data: 0.0754 max mem: 9377 +Train: [47] [ 600/6250] eta: 0:16:28 lr: 0.000074 grad: 0.1734 (0.2632) loss: 0.6920 (0.6975) time: 0.1708 data: 0.0725 max mem: 9377 +Train: [47] [ 700/6250] eta: 0:16:19 lr: 0.000074 grad: 0.1653 (0.2498) loss: 0.6876 (0.6976) time: 0.1730 data: 0.0942 max mem: 9377 +Train: [47] [ 800/6250] eta: 0:16:12 lr: 0.000074 grad: 0.1652 (0.2393) loss: 0.6915 (0.6980) time: 0.2053 data: 0.1186 max mem: 9377 +Train: [47] [ 900/6250] eta: 0:15:59 lr: 0.000074 grad: 0.1720 (0.2317) loss: 0.6915 (0.6982) time: 0.1767 data: 0.0753 max mem: 9377 +Train: [47] [1000/6250] eta: 0:15:47 lr: 0.000073 grad: 0.1676 (0.2255) loss: 0.6925 (0.6981) time: 0.1872 data: 0.0956 max mem: 9377 +Train: [47] [1100/6250] eta: 0:15:27 lr: 0.000073 grad: 0.1629 (0.2200) loss: 0.6943 (0.6981) time: 0.1798 data: 0.0844 max mem: 9377 +Train: [47] [1200/6250] eta: 0:15:01 lr: 0.000073 grad: 0.1648 (0.2155) loss: 0.6989 (0.6979) time: 0.1655 data: 0.0748 max mem: 9377 +Train: [47] [1300/6250] eta: 0:14:37 lr: 0.000073 grad: 0.1666 (0.2117) loss: 0.7058 (0.6980) time: 0.1660 data: 0.0787 max mem: 9377 +Train: [47] [1400/6250] eta: 0:14:12 lr: 0.000073 grad: 0.1681 (0.2087) loss: 0.6980 (0.6978) time: 0.1490 data: 0.0546 max mem: 9377 +Train: [47] [1500/6250] eta: 0:13:50 lr: 0.000073 grad: 0.1720 (0.2059) loss: 0.6890 (0.6977) time: 0.1633 data: 0.0802 max mem: 9377 +Train: [47] [1600/6250] eta: 0:13:30 lr: 0.000073 grad: 0.1746 (0.2036) loss: 0.6883 (0.6976) time: 0.1675 data: 0.0803 max mem: 9377 +Train: [47] [1700/6250] eta: 0:13:09 lr: 0.000073 grad: 0.1611 (0.2014) loss: 0.6863 (0.6976) time: 0.1713 data: 0.0872 max mem: 9377 +Train: [47] [1800/6250] eta: 0:12:51 lr: 0.000073 grad: 0.1587 (0.1994) loss: 0.6937 (0.6975) time: 0.1286 data: 0.0491 max mem: 9377 +Train: [47] [1900/6250] eta: 0:12:31 lr: 0.000073 grad: 0.1658 (0.1976) loss: 0.6901 (0.6976) time: 0.1721 data: 0.0882 max mem: 9377 +Train: [47] [2000/6250] eta: 0:12:12 lr: 0.000073 grad: 0.1564 (0.1960) loss: 0.7013 (0.6976) time: 0.1563 data: 0.0669 max mem: 9377 +Train: [47] [2100/6250] eta: 0:11:52 lr: 0.000073 grad: 0.1676 (0.1945) loss: 0.7011 (0.6976) time: 0.1620 data: 0.0687 max mem: 9377 +Train: [47] [2200/6250] eta: 0:11:38 lr: 0.000073 grad: 0.1603 (0.1930) loss: 0.6906 (0.6975) time: 0.2012 data: 0.1082 max mem: 9377 +Train: [47] [2300/6250] eta: 0:11:18 lr: 0.000073 grad: 0.1682 (0.1918) loss: 0.6912 (0.6972) time: 0.1557 data: 0.0691 max mem: 9377 +Train: [47] [2400/6250] eta: 0:11:00 lr: 0.000073 grad: 0.1611 (0.1908) loss: 0.6889 (0.6968) time: 0.1707 data: 0.0816 max mem: 9377 +Train: [47] [2500/6250] eta: 0:10:42 lr: 0.000073 grad: 0.1599 (0.1897) loss: 0.6869 (0.6965) time: 0.1810 data: 0.0863 max mem: 9377 +Train: [47] [2600/6250] eta: 0:10:23 lr: 0.000073 grad: 0.1630 (0.1887) loss: 0.6815 (0.6962) time: 0.1691 data: 0.0849 max mem: 9377 +Train: [47] [2700/6250] eta: 0:10:03 lr: 0.000073 grad: 0.1620 (0.1878) loss: 0.6948 (0.6961) time: 0.1481 data: 0.0551 max mem: 9377 +Train: [47] [2800/6250] eta: 0:09:44 lr: 0.000073 grad: 0.1617 (0.1870) loss: 0.6937 (0.6958) time: 0.1579 data: 0.0701 max mem: 9377 +Train: [47] [2900/6250] eta: 0:09:25 lr: 0.000073 grad: 0.1675 (0.1862) loss: 0.6907 (0.6956) time: 0.1548 data: 0.0637 max mem: 9377 +Train: [47] [3000/6250] eta: 0:09:07 lr: 0.000073 grad: 0.1582 (0.1856) loss: 0.6856 (0.6954) time: 0.1266 data: 0.0380 max mem: 9377 +Train: [47] [3100/6250] eta: 0:08:51 lr: 0.000073 grad: 0.1632 (0.1849) loss: 0.6902 (0.6951) time: 0.1655 data: 0.0809 max mem: 9377 +Train: [47] [3200/6250] eta: 0:08:33 lr: 0.000073 grad: 0.1610 (0.1844) loss: 0.6804 (0.6949) time: 0.1947 data: 0.1070 max mem: 9377 +Train: [47] [3300/6250] eta: 0:08:16 lr: 0.000073 grad: 0.1642 (0.1838) loss: 0.6922 (0.6947) time: 0.1570 data: 0.0715 max mem: 9377 +Train: [47] [3400/6250] eta: 0:07:59 lr: 0.000073 grad: 0.1623 (0.1833) loss: 0.6821 (0.6946) time: 0.1895 data: 0.1009 max mem: 9377 +Train: [47] [3500/6250] eta: 0:07:42 lr: 0.000073 grad: 0.1640 (0.1828) loss: 0.6927 (0.6943) time: 0.1585 data: 0.0732 max mem: 9377 +Train: [47] [3600/6250] eta: 0:07:26 lr: 0.000073 grad: 0.1596 (0.1823) loss: 0.6840 (0.6942) time: 0.1857 data: 0.0928 max mem: 9377 +Train: [47] [3700/6250] eta: 0:07:08 lr: 0.000073 grad: 0.1656 (0.1819) loss: 0.6767 (0.6940) time: 0.1424 data: 0.0572 max mem: 9377 +Train: [47] [3800/6250] eta: 0:06:50 lr: 0.000073 grad: 0.1607 (0.1813) loss: 0.6942 (0.6940) time: 0.1483 data: 0.0594 max mem: 9377 +Train: [47] [3900/6250] eta: 0:06:32 lr: 0.000073 grad: 0.1673 (0.1809) loss: 0.6848 (0.6939) time: 0.1540 data: 0.0697 max mem: 9377 +Train: [47] [4000/6250] eta: 0:06:14 lr: 0.000073 grad: 0.1627 (0.1805) loss: 0.6930 (0.6937) time: 0.1616 data: 0.0767 max mem: 9377 +Train: [47] [4100/6250] eta: 0:05:57 lr: 0.000072 grad: 0.1573 (0.1801) loss: 0.6832 (0.6936) time: 0.1492 data: 0.0529 max mem: 9377 +Train: [47] [4200/6250] eta: 0:05:40 lr: 0.000072 grad: 0.1701 (0.1797) loss: 0.6800 (0.6935) time: 0.1961 data: 0.1150 max mem: 9377 +Train: [47] [4300/6250] eta: 0:05:24 lr: 0.000072 grad: 0.1641 (0.1793) loss: 0.6916 (0.6934) time: 0.1835 data: 0.1023 max mem: 9377 +Train: [47] [4400/6250] eta: 0:05:07 lr: 0.000072 grad: 0.1618 (0.1791) loss: 0.6839 (0.6933) time: 0.1497 data: 0.0593 max mem: 9377 +Train: [47] [4500/6250] eta: 0:04:50 lr: 0.000072 grad: 0.1649 (0.1788) loss: 0.6711 (0.6932) time: 0.1602 data: 0.0707 max mem: 9377 +Train: [47] [4600/6250] eta: 0:04:33 lr: 0.000072 grad: 0.1596 (0.1785) loss: 0.6861 (0.6932) time: 0.1560 data: 0.0703 max mem: 9377 +Train: [47] [4700/6250] eta: 0:04:16 lr: 0.000072 grad: 0.1697 (0.1782) loss: 0.6797 (0.6931) time: 0.1551 data: 0.0642 max mem: 9377 +Train: [47] [4800/6250] eta: 0:04:00 lr: 0.000072 grad: 0.1681 (0.1779) loss: 0.6934 (0.6930) time: 0.1611 data: 0.0671 max mem: 9377 +Train: [47] [4900/6250] eta: 0:03:43 lr: 0.000072 grad: 0.1629 (0.1777) loss: 0.6777 (0.6928) time: 0.1307 data: 0.0409 max mem: 9377 +Train: [47] [5000/6250] eta: 0:03:26 lr: 0.000072 grad: 0.1700 (0.1775) loss: 0.6910 (0.6926) time: 0.1613 data: 0.0771 max mem: 9377 +Train: [47] [5100/6250] eta: 0:03:10 lr: 0.000072 grad: 0.1735 (0.1774) loss: 0.6744 (0.6923) time: 0.1651 data: 0.0780 max mem: 9377 +Train: [47] [5200/6250] eta: 0:02:53 lr: 0.000072 grad: 0.1704 (0.1773) loss: 0.6822 (0.6921) time: 0.1673 data: 0.0831 max mem: 9377 +Train: [47] [5300/6250] eta: 0:02:37 lr: 0.000072 grad: 0.1668 (0.1771) loss: 0.6752 (0.6919) time: 0.1367 data: 0.0507 max mem: 9377 +Train: [47] [5400/6250] eta: 0:02:20 lr: 0.000072 grad: 0.1716 (0.1770) loss: 0.6788 (0.6917) time: 0.1647 data: 0.0683 max mem: 9377 +Train: [47] [5500/6250] eta: 0:02:03 lr: 0.000072 grad: 0.1575 (0.1767) loss: 0.6958 (0.6917) time: 0.1599 data: 0.0715 max mem: 9377 +Train: [47] [5600/6250] eta: 0:01:47 lr: 0.000072 grad: 0.1654 (0.1765) loss: 0.6912 (0.6916) time: 0.1047 data: 0.0002 max mem: 9377 +Train: [47] [5700/6250] eta: 0:01:30 lr: 0.000072 grad: 0.1647 (0.1764) loss: 0.6797 (0.6915) time: 0.1557 data: 0.0697 max mem: 9377 +Train: [47] [5800/6250] eta: 0:01:14 lr: 0.000072 grad: 0.1665 (0.1763) loss: 0.6944 (0.6915) time: 0.1783 data: 0.0927 max mem: 9377 +Train: [47] [5900/6250] eta: 0:00:57 lr: 0.000072 grad: 0.1642 (0.1761) loss: 0.6892 (0.6914) time: 0.1533 data: 0.0765 max mem: 9377 +Train: [47] [6000/6250] eta: 0:00:41 lr: 0.000072 grad: 0.1626 (0.1760) loss: 0.6975 (0.6915) time: 0.1284 data: 0.0389 max mem: 9377 +Train: [47] [6100/6250] eta: 0:00:24 lr: 0.000072 grad: 0.1607 (0.1758) loss: 0.7014 (0.6915) time: 0.1547 data: 0.0689 max mem: 9377 +Train: [47] [6200/6250] eta: 0:00:08 lr: 0.000072 grad: 0.1635 (0.1756) loss: 0.6925 (0.6915) time: 0.1393 data: 0.0492 max mem: 9377 +Train: [47] [6249/6250] eta: 0:00:00 lr: 0.000072 grad: 0.1610 (0.1755) loss: 0.6893 (0.6915) time: 0.1968 data: 0.1110 max mem: 9377 +Train: [47] Total time: 0:17:11 (0.1650 s / it) +Averaged stats: lr: 0.000072 grad: 0.1610 (0.1755) loss: 0.6893 (0.6915) +Eval (hcp-train-subset): [47] [ 0/62] eta: 0:03:55 loss: 0.8773 (0.8773) time: 3.7936 data: 3.7218 max mem: 9377 +Eval (hcp-train-subset): [47] [61/62] eta: 0:00:00 loss: 0.8937 (0.8946) time: 0.1280 data: 0.1028 max mem: 9377 +Eval (hcp-train-subset): [47] Total time: 0:00:14 (0.2331 s / it) +Averaged stats (hcp-train-subset): loss: 0.8937 (0.8946) +Eval (hcp-val): [47] [ 0/62] eta: 0:03:42 loss: 0.8968 (0.8968) time: 3.5810 data: 3.4924 max mem: 9377 +Eval (hcp-val): [47] [61/62] eta: 0:00:00 loss: 0.8884 (0.8917) time: 0.1226 data: 0.0932 max mem: 9377 +Eval (hcp-val): [47] Total time: 0:00:15 (0.2420 s / it) +Averaged stats (hcp-val): loss: 0.8884 (0.8917) +Eval (nsd-val): [47] [ 0/62] eta: 0:05:55 loss: 0.8726 (0.8726) time: 5.7358 data: 5.6702 max mem: 9377 +Eval (nsd-val): [47] [61/62] eta: 0:00:00 loss: 0.8783 (0.8797) time: 0.1392 data: 0.1137 max mem: 9377 +Eval (nsd-val): [47] Total time: 0:00:14 (0.2364 s / it) +Averaged stats (nsd-val): loss: 0.8783 (0.8797) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [48] [ 0/6250] eta: 10:18:47 lr: 0.000072 grad: 0.1664 (0.1664) loss: 0.8159 (0.8159) time: 5.9405 data: 5.8037 max mem: 9377 +Train: [48] [ 100/6250] eta: 0:23:12 lr: 0.000072 grad: 0.3971 (0.3526) loss: 0.7036 (0.7285) time: 0.1713 data: 0.0698 max mem: 9377 +Train: [48] [ 200/6250] eta: 0:21:03 lr: 0.000072 grad: 0.2885 (0.3594) loss: 0.6982 (0.7173) time: 0.2027 data: 0.1138 max mem: 9377 +Train: [48] [ 300/6250] eta: 0:19:32 lr: 0.000072 grad: 0.2632 (0.3301) loss: 0.7015 (0.7132) time: 0.1928 data: 0.1027 max mem: 9377 +Train: [48] [ 400/6250] eta: 0:18:26 lr: 0.000072 grad: 0.2158 (0.3143) loss: 0.7073 (0.7095) time: 0.1653 data: 0.0702 max mem: 9377 +Train: [48] [ 500/6250] eta: 0:17:33 lr: 0.000072 grad: 0.2176 (0.2971) loss: 0.6855 (0.7054) time: 0.1631 data: 0.0645 max mem: 9377 +Train: [48] [ 600/6250] eta: 0:17:16 lr: 0.000072 grad: 0.2091 (0.2825) loss: 0.6895 (0.7031) time: 0.1636 data: 0.0758 max mem: 9377 +Train: [48] [ 700/6250] eta: 0:16:46 lr: 0.000072 grad: 0.1819 (0.2696) loss: 0.6961 (0.7018) time: 0.1246 data: 0.0376 max mem: 9377 +Train: [48] [ 800/6250] eta: 0:16:16 lr: 0.000072 grad: 0.1843 (0.2593) loss: 0.6782 (0.7002) time: 0.1535 data: 0.0704 max mem: 9377 +Train: [48] [ 900/6250] eta: 0:15:56 lr: 0.000071 grad: 0.1774 (0.2507) loss: 0.6858 (0.6995) time: 0.1622 data: 0.0680 max mem: 9377 +Train: [48] [1000/6250] eta: 0:15:33 lr: 0.000071 grad: 0.1638 (0.2430) loss: 0.7053 (0.6984) time: 0.1608 data: 0.0681 max mem: 9377 +Train: [48] [1100/6250] eta: 0:15:13 lr: 0.000071 grad: 0.1669 (0.2366) loss: 0.6857 (0.6976) time: 0.1852 data: 0.0910 max mem: 9377 +Train: [48] [1200/6250] eta: 0:14:48 lr: 0.000071 grad: 0.1713 (0.2309) loss: 0.6757 (0.6969) time: 0.1609 data: 0.0668 max mem: 9377 +Train: [48] [1300/6250] eta: 0:14:22 lr: 0.000071 grad: 0.1718 (0.2267) loss: 0.6844 (0.6962) time: 0.1525 data: 0.0536 max mem: 9377 +Train: [48] [1400/6250] eta: 0:13:56 lr: 0.000071 grad: 0.1700 (0.2229) loss: 0.6841 (0.6958) time: 0.1498 data: 0.0579 max mem: 9377 +Train: [48] [1500/6250] eta: 0:13:33 lr: 0.000071 grad: 0.1673 (0.2192) loss: 0.6962 (0.6957) time: 0.1505 data: 0.0474 max mem: 9377 +Train: [48] [1600/6250] eta: 0:13:12 lr: 0.000071 grad: 0.1657 (0.2159) loss: 0.6847 (0.6953) time: 0.1753 data: 0.0877 max mem: 9377 +Train: [48] [1700/6250] eta: 0:12:53 lr: 0.000071 grad: 0.1602 (0.2130) loss: 0.6869 (0.6951) time: 0.1506 data: 0.0626 max mem: 9377 +Train: [48] [1800/6250] eta: 0:12:36 lr: 0.000071 grad: 0.1590 (0.2103) loss: 0.6952 (0.6948) time: 0.1815 data: 0.1032 max mem: 9377 +Train: [48] [1900/6250] eta: 0:12:17 lr: 0.000071 grad: 0.1630 (0.2080) loss: 0.7004 (0.6944) time: 0.1526 data: 0.0585 max mem: 9377 +Train: [48] [2000/6250] eta: 0:12:00 lr: 0.000071 grad: 0.1616 (0.2059) loss: 0.6967 (0.6944) time: 0.1788 data: 0.0876 max mem: 9377 +Train: [48] [2100/6250] eta: 0:11:40 lr: 0.000071 grad: 0.1618 (0.2039) loss: 0.6888 (0.6945) time: 0.1684 data: 0.0865 max mem: 9377 +Train: [48] [2200/6250] eta: 0:11:21 lr: 0.000071 grad: 0.1636 (0.2020) loss: 0.6895 (0.6945) time: 0.1395 data: 0.0532 max mem: 9377 +Train: [48] [2300/6250] eta: 0:11:04 lr: 0.000071 grad: 0.1542 (0.2002) loss: 0.6980 (0.6944) time: 0.1620 data: 0.0689 max mem: 9377 +Train: [48] [2400/6250] eta: 0:10:46 lr: 0.000071 grad: 0.1571 (0.1986) loss: 0.7000 (0.6945) time: 0.1523 data: 0.0630 max mem: 9377 +Train: [48] [2500/6250] eta: 0:10:30 lr: 0.000071 grad: 0.1598 (0.1971) loss: 0.6920 (0.6944) time: 0.1961 data: 0.1107 max mem: 9377 +Train: [48] [2600/6250] eta: 0:10:10 lr: 0.000071 grad: 0.1625 (0.1959) loss: 0.6983 (0.6944) time: 0.1656 data: 0.0669 max mem: 9377 +Train: [48] [2700/6250] eta: 0:09:52 lr: 0.000071 grad: 0.1643 (0.1948) loss: 0.6919 (0.6943) time: 0.1645 data: 0.0681 max mem: 9377 +Train: [48] [2800/6250] eta: 0:09:34 lr: 0.000071 grad: 0.1635 (0.1938) loss: 0.6967 (0.6943) time: 0.1566 data: 0.0694 max mem: 9377 +Train: [48] [2900/6250] eta: 0:09:15 lr: 0.000071 grad: 0.1641 (0.1929) loss: 0.6979 (0.6943) time: 0.1457 data: 0.0537 max mem: 9377 +Train: [48] [3000/6250] eta: 0:08:57 lr: 0.000071 grad: 0.1590 (0.1921) loss: 0.6986 (0.6944) time: 0.1499 data: 0.0540 max mem: 9377 +Train: [48] [3100/6250] eta: 0:08:39 lr: 0.000071 grad: 0.1655 (0.1912) loss: 0.6905 (0.6944) time: 0.1514 data: 0.0637 max mem: 9377 +Train: [48] [3200/6250] eta: 0:08:23 lr: 0.000071 grad: 0.1656 (0.1904) loss: 0.6941 (0.6945) time: 0.1616 data: 0.0761 max mem: 9377 +Train: [48] [3300/6250] eta: 0:08:06 lr: 0.000071 grad: 0.1620 (0.1896) loss: 0.7095 (0.6945) time: 0.1655 data: 0.0701 max mem: 9377 +Train: [48] [3400/6250] eta: 0:07:49 lr: 0.000071 grad: 0.1617 (0.1890) loss: 0.6931 (0.6942) time: 0.1545 data: 0.0648 max mem: 9377 +Train: [48] [3500/6250] eta: 0:07:32 lr: 0.000071 grad: 0.1638 (0.1884) loss: 0.6810 (0.6938) time: 0.1686 data: 0.0880 max mem: 9377 +Train: [48] [3600/6250] eta: 0:07:15 lr: 0.000071 grad: 0.1662 (0.1879) loss: 0.6768 (0.6934) time: 0.1692 data: 0.0822 max mem: 9377 +Train: [48] [3700/6250] eta: 0:06:58 lr: 0.000071 grad: 0.1654 (0.1873) loss: 0.6893 (0.6932) time: 0.1599 data: 0.0732 max mem: 9377 +Train: [48] [3800/6250] eta: 0:06:41 lr: 0.000071 grad: 0.1648 (0.1869) loss: 0.6815 (0.6929) time: 0.1642 data: 0.0778 max mem: 9377 +Train: [48] [3900/6250] eta: 0:06:25 lr: 0.000070 grad: 0.1632 (0.1863) loss: 0.6844 (0.6926) time: 0.1719 data: 0.0869 max mem: 9377 +Train: [48] [4000/6250] eta: 0:06:07 lr: 0.000070 grad: 0.1647 (0.1859) loss: 0.6877 (0.6924) time: 0.1106 data: 0.0171 max mem: 9377 +Train: [48] [4100/6250] eta: 0:05:51 lr: 0.000070 grad: 0.1591 (0.1854) loss: 0.6786 (0.6922) time: 0.1756 data: 0.0894 max mem: 9377 +Train: [48] [4200/6250] eta: 0:05:33 lr: 0.000070 grad: 0.1617 (0.1849) loss: 0.6845 (0.6921) time: 0.1594 data: 0.0576 max mem: 9377 +Train: [48] [4300/6250] eta: 0:05:16 lr: 0.000070 grad: 0.1588 (0.1846) loss: 0.6927 (0.6919) time: 0.1500 data: 0.0594 max mem: 9377 +Train: [48] [4400/6250] eta: 0:05:00 lr: 0.000070 grad: 0.1617 (0.1841) loss: 0.6827 (0.6919) time: 0.1534 data: 0.0676 max mem: 9377 +Train: [48] [4500/6250] eta: 0:04:44 lr: 0.000070 grad: 0.1636 (0.1837) loss: 0.6874 (0.6919) time: 0.1449 data: 0.0546 max mem: 9377 +Train: [48] [4600/6250] eta: 0:04:27 lr: 0.000070 grad: 0.1633 (0.1834) loss: 0.7024 (0.6919) time: 0.1469 data: 0.0676 max mem: 9377 +Train: [48] [4700/6250] eta: 0:04:11 lr: 0.000070 grad: 0.1686 (0.1830) loss: 0.6825 (0.6919) time: 0.1527 data: 0.0593 max mem: 9377 +Train: [48] [4800/6250] eta: 0:03:54 lr: 0.000070 grad: 0.1626 (0.1827) loss: 0.7022 (0.6919) time: 0.1595 data: 0.0782 max mem: 9377 +Train: [48] [4900/6250] eta: 0:03:38 lr: 0.000070 grad: 0.1615 (0.1824) loss: 0.6991 (0.6918) time: 0.1511 data: 0.0620 max mem: 9377 +Train: [48] [5000/6250] eta: 0:03:21 lr: 0.000070 grad: 0.1649 (0.1821) loss: 0.6938 (0.6917) time: 0.1481 data: 0.0614 max mem: 9377 +Train: [48] [5100/6250] eta: 0:03:05 lr: 0.000070 grad: 0.1679 (0.1819) loss: 0.6830 (0.6915) time: 0.1503 data: 0.0693 max mem: 9377 +Train: [48] [5200/6250] eta: 0:02:49 lr: 0.000070 grad: 0.1666 (0.1816) loss: 0.6758 (0.6914) time: 0.1428 data: 0.0592 max mem: 9377 +Train: [48] [5300/6250] eta: 0:02:33 lr: 0.000070 grad: 0.1720 (0.1814) loss: 0.6808 (0.6914) time: 0.1411 data: 0.0543 max mem: 9377 +Train: [48] [5400/6250] eta: 0:02:17 lr: 0.000070 grad: 0.1684 (0.1813) loss: 0.6808 (0.6913) time: 0.1735 data: 0.0786 max mem: 9377 +Train: [48] [5500/6250] eta: 0:02:00 lr: 0.000070 grad: 0.1640 (0.1810) loss: 0.6939 (0.6912) time: 0.1443 data: 0.0595 max mem: 9377 +Train: [48] [5600/6250] eta: 0:01:44 lr: 0.000070 grad: 0.1668 (0.1808) loss: 0.6869 (0.6912) time: 0.1774 data: 0.0926 max mem: 9377 +Train: [48] [5700/6250] eta: 0:01:28 lr: 0.000070 grad: 0.1699 (0.1805) loss: 0.6963 (0.6911) time: 0.1578 data: 0.0644 max mem: 9377 +Train: [48] [5800/6250] eta: 0:01:12 lr: 0.000070 grad: 0.1644 (0.1803) loss: 0.6931 (0.6910) time: 0.1306 data: 0.0356 max mem: 9377 +Train: [48] [5900/6250] eta: 0:00:56 lr: 0.000070 grad: 0.1663 (0.1801) loss: 0.6700 (0.6909) time: 0.1816 data: 0.0979 max mem: 9377 +Train: [48] [6000/6250] eta: 0:00:40 lr: 0.000070 grad: 0.1676 (0.1799) loss: 0.6825 (0.6908) time: 0.1719 data: 0.0820 max mem: 9377 +Train: [48] [6100/6250] eta: 0:00:24 lr: 0.000070 grad: 0.1594 (0.1797) loss: 0.6847 (0.6907) time: 0.1487 data: 0.0573 max mem: 9377 +Train: [48] [6200/6250] eta: 0:00:08 lr: 0.000070 grad: 0.1632 (0.1795) loss: 0.6879 (0.6906) time: 0.1355 data: 0.0490 max mem: 9377 +Train: [48] [6249/6250] eta: 0:00:00 lr: 0.000070 grad: 0.1646 (0.1794) loss: 0.6934 (0.6907) time: 0.1594 data: 0.0655 max mem: 9377 +Train: [48] Total time: 0:16:49 (0.1615 s / it) +Averaged stats: lr: 0.000070 grad: 0.1646 (0.1794) loss: 0.6934 (0.6907) +Eval (hcp-train-subset): [48] [ 0/62] eta: 0:05:02 loss: 0.8783 (0.8783) time: 4.8727 data: 4.8189 max mem: 9377 +Eval (hcp-train-subset): [48] [61/62] eta: 0:00:00 loss: 0.8906 (0.8917) time: 0.1370 data: 0.1103 max mem: 9377 +Eval (hcp-train-subset): [48] Total time: 0:00:15 (0.2424 s / it) +Averaged stats (hcp-train-subset): loss: 0.8906 (0.8917) +Eval (hcp-val): [48] [ 0/62] eta: 0:06:04 loss: 0.8907 (0.8907) time: 5.8864 data: 5.8558 max mem: 9377 +Eval (hcp-val): [48] [61/62] eta: 0:00:00 loss: 0.8900 (0.8911) time: 0.1378 data: 0.1124 max mem: 9377 +Eval (hcp-val): [48] Total time: 0:00:15 (0.2488 s / it) +Averaged stats (hcp-val): loss: 0.8900 (0.8911) +Eval (nsd-val): [48] [ 0/62] eta: 0:03:34 loss: 0.8645 (0.8645) time: 3.4574 data: 3.3863 max mem: 9377 +Eval (nsd-val): [48] [61/62] eta: 0:00:00 loss: 0.8742 (0.8758) time: 0.1573 data: 0.1320 max mem: 9377 +Eval (nsd-val): [48] Total time: 0:00:15 (0.2429 s / it) +Averaged stats (nsd-val): loss: 0.8742 (0.8758) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [49] [ 0/6250] eta: 11:08:02 lr: 0.000070 grad: 0.1998 (0.1998) loss: 0.7724 (0.7724) time: 6.4133 data: 6.3120 max mem: 9377 +Train: [49] [ 100/6250] eta: 0:23:28 lr: 0.000070 grad: 0.2806 (0.3507) loss: 0.7171 (0.7209) time: 0.1754 data: 0.0701 max mem: 9377 +Train: [49] [ 200/6250] eta: 0:20:25 lr: 0.000070 grad: 0.3216 (0.3370) loss: 0.6795 (0.7062) time: 0.1788 data: 0.0824 max mem: 9377 +Train: [49] [ 300/6250] eta: 0:18:53 lr: 0.000070 grad: 0.2779 (0.3274) loss: 0.6569 (0.6983) time: 0.1552 data: 0.0509 max mem: 9377 +Train: [49] [ 400/6250] eta: 0:18:03 lr: 0.000070 grad: 0.2505 (0.3110) loss: 0.6575 (0.6943) time: 0.1569 data: 0.0535 max mem: 9377 +Train: [49] [ 500/6250] eta: 0:17:23 lr: 0.000070 grad: 0.2126 (0.2934) loss: 0.6601 (0.6909) time: 0.1488 data: 0.0532 max mem: 9377 +Train: [49] [ 600/6250] eta: 0:16:54 lr: 0.000070 grad: 0.1880 (0.2782) loss: 0.6665 (0.6885) time: 0.1491 data: 0.0627 max mem: 9377 +Train: [49] [ 700/6250] eta: 0:16:32 lr: 0.000069 grad: 0.1735 (0.2644) loss: 0.6736 (0.6870) time: 0.1810 data: 0.0924 max mem: 9377 +Train: [49] [ 800/6250] eta: 0:16:14 lr: 0.000069 grad: 0.1702 (0.2531) loss: 0.6872 (0.6864) time: 0.1762 data: 0.0906 max mem: 9377 +Train: [49] [ 900/6250] eta: 0:15:45 lr: 0.000069 grad: 0.1692 (0.2442) loss: 0.6861 (0.6861) time: 0.1573 data: 0.0654 max mem: 9377 +Train: [49] [1000/6250] eta: 0:15:25 lr: 0.000069 grad: 0.1656 (0.2370) loss: 0.6849 (0.6860) time: 0.1720 data: 0.0839 max mem: 9377 +Train: [49] [1100/6250] eta: 0:15:03 lr: 0.000069 grad: 0.1686 (0.2309) loss: 0.6770 (0.6859) time: 0.1629 data: 0.0719 max mem: 9377 +Train: [49] [1200/6250] eta: 0:14:38 lr: 0.000069 grad: 0.1676 (0.2257) loss: 0.6855 (0.6859) time: 0.1525 data: 0.0443 max mem: 9377 +Train: [49] [1300/6250] eta: 0:14:14 lr: 0.000069 grad: 0.1693 (0.2213) loss: 0.6891 (0.6859) time: 0.1379 data: 0.0494 max mem: 9377 +Train: [49] [1400/6250] eta: 0:13:48 lr: 0.000069 grad: 0.1699 (0.2176) loss: 0.6815 (0.6860) time: 0.1420 data: 0.0498 max mem: 9377 +Train: [49] [1500/6250] eta: 0:13:29 lr: 0.000069 grad: 0.1685 (0.2143) loss: 0.6940 (0.6860) time: 0.1624 data: 0.0768 max mem: 9377 +Train: [49] [1600/6250] eta: 0:13:07 lr: 0.000069 grad: 0.1698 (0.2117) loss: 0.6754 (0.6859) time: 0.1797 data: 0.1041 max mem: 9377 +Train: [49] [1700/6250] eta: 0:12:47 lr: 0.000069 grad: 0.1688 (0.2091) loss: 0.6792 (0.6862) time: 0.1649 data: 0.0790 max mem: 9377 +Train: [49] [1800/6250] eta: 0:12:27 lr: 0.000069 grad: 0.1647 (0.2069) loss: 0.6817 (0.6862) time: 0.1451 data: 0.0569 max mem: 9377 +Train: [49] [1900/6250] eta: 0:12:12 lr: 0.000069 grad: 0.1597 (0.2050) loss: 0.6959 (0.6863) time: 0.1773 data: 0.0885 max mem: 9377 +Train: [49] [2000/6250] eta: 0:11:55 lr: 0.000069 grad: 0.1696 (0.2032) loss: 0.6957 (0.6863) time: 0.1735 data: 0.0817 max mem: 9377 +Train: [49] [2100/6250] eta: 0:11:39 lr: 0.000069 grad: 0.1596 (0.2016) loss: 0.6808 (0.6863) time: 0.1620 data: 0.0762 max mem: 9377 +Train: [49] [2200/6250] eta: 0:11:20 lr: 0.000069 grad: 0.1664 (0.2002) loss: 0.6860 (0.6861) time: 0.1418 data: 0.0582 max mem: 9377 +Train: [49] [2300/6250] eta: 0:11:04 lr: 0.000069 grad: 0.1647 (0.1988) loss: 0.6872 (0.6861) time: 0.1674 data: 0.0736 max mem: 9377 +Train: [49] [2400/6250] eta: 0:10:47 lr: 0.000069 grad: 0.1781 (0.1976) loss: 0.6799 (0.6859) time: 0.1584 data: 0.0677 max mem: 9377 +Train: [49] [2500/6250] eta: 0:10:28 lr: 0.000069 grad: 0.1680 (0.1966) loss: 0.6618 (0.6857) time: 0.1508 data: 0.0512 max mem: 9377 +Train: [49] [2600/6250] eta: 0:10:08 lr: 0.000069 grad: 0.1620 (0.1956) loss: 0.6963 (0.6856) time: 0.1436 data: 0.0506 max mem: 9377 +Train: [49] [2700/6250] eta: 0:09:49 lr: 0.000069 grad: 0.1691 (0.1946) loss: 0.6803 (0.6856) time: 0.1442 data: 0.0566 max mem: 9377 +Train: [49] [2800/6250] eta: 0:09:30 lr: 0.000069 grad: 0.1736 (0.1938) loss: 0.6785 (0.6854) time: 0.1385 data: 0.0506 max mem: 9377 +Train: [49] [2900/6250] eta: 0:09:12 lr: 0.000069 grad: 0.1683 (0.1930) loss: 0.6723 (0.6852) time: 0.1506 data: 0.0524 max mem: 9377 +Train: [49] [3000/6250] eta: 0:08:55 lr: 0.000069 grad: 0.1749 (0.1924) loss: 0.6904 (0.6851) time: 0.1741 data: 0.0878 max mem: 9377 +Train: [49] [3100/6250] eta: 0:08:38 lr: 0.000069 grad: 0.1699 (0.1917) loss: 0.6899 (0.6850) time: 0.1701 data: 0.0858 max mem: 9377 +Train: [49] [3200/6250] eta: 0:08:22 lr: 0.000069 grad: 0.1698 (0.1910) loss: 0.6843 (0.6848) time: 0.1543 data: 0.0670 max mem: 9377 +Train: [49] [3300/6250] eta: 0:08:05 lr: 0.000069 grad: 0.1658 (0.1904) loss: 0.6723 (0.6846) time: 0.1570 data: 0.0719 max mem: 9377 +Train: [49] [3400/6250] eta: 0:07:49 lr: 0.000069 grad: 0.1696 (0.1898) loss: 0.6704 (0.6844) time: 0.1608 data: 0.0755 max mem: 9377 +Train: [49] [3500/6250] eta: 0:07:32 lr: 0.000069 grad: 0.1729 (0.1893) loss: 0.6667 (0.6843) time: 0.1469 data: 0.0562 max mem: 9377 +Train: [49] [3600/6250] eta: 0:07:14 lr: 0.000069 grad: 0.1700 (0.1887) loss: 0.6872 (0.6843) time: 0.1569 data: 0.0679 max mem: 9377 +Train: [49] [3700/6250] eta: 0:06:58 lr: 0.000069 grad: 0.1677 (0.1882) loss: 0.6774 (0.6841) time: 0.1662 data: 0.0690 max mem: 9377 +Train: [49] [3800/6250] eta: 0:06:42 lr: 0.000068 grad: 0.1659 (0.1877) loss: 0.6846 (0.6842) time: 0.1655 data: 0.0590 max mem: 9377 +Train: [49] [3900/6250] eta: 0:06:26 lr: 0.000068 grad: 0.1627 (0.1872) loss: 0.6901 (0.6842) time: 0.1760 data: 0.0755 max mem: 9377 +Train: [49] [4000/6250] eta: 0:06:09 lr: 0.000068 grad: 0.1663 (0.1867) loss: 0.6763 (0.6842) time: 0.1642 data: 0.0645 max mem: 9377 +Train: [49] [4100/6250] eta: 0:05:52 lr: 0.000068 grad: 0.1640 (0.1862) loss: 0.6919 (0.6843) time: 0.1218 data: 0.0315 max mem: 9377 +Train: [49] [4200/6250] eta: 0:05:35 lr: 0.000068 grad: 0.1652 (0.1858) loss: 0.6810 (0.6843) time: 0.1412 data: 0.0523 max mem: 9377 +Train: [49] [4300/6250] eta: 0:05:18 lr: 0.000068 grad: 0.1646 (0.1854) loss: 0.6780 (0.6843) time: 0.1292 data: 0.0344 max mem: 9377 +Train: [49] [4400/6250] eta: 0:05:02 lr: 0.000068 grad: 0.1721 (0.1850) loss: 0.6651 (0.6842) time: 0.1657 data: 0.0809 max mem: 9377 +Train: [49] [4500/6250] eta: 0:04:46 lr: 0.000068 grad: 0.1672 (0.1847) loss: 0.6885 (0.6842) time: 0.1613 data: 0.0724 max mem: 9377 +Train: [49] [4600/6250] eta: 0:04:30 lr: 0.000068 grad: 0.1591 (0.1843) loss: 0.6957 (0.6842) time: 0.1515 data: 0.0681 max mem: 9377 +Train: [49] [4700/6250] eta: 0:04:13 lr: 0.000068 grad: 0.1615 (0.1839) loss: 0.6750 (0.6841) time: 0.1827 data: 0.0993 max mem: 9377 +Train: [49] [4800/6250] eta: 0:03:56 lr: 0.000068 grad: 0.1686 (0.1836) loss: 0.6808 (0.6841) time: 0.1525 data: 0.0586 max mem: 9377 +Train: [49] [4900/6250] eta: 0:03:40 lr: 0.000068 grad: 0.1602 (0.1832) loss: 0.6880 (0.6841) time: 0.1390 data: 0.0532 max mem: 9377 +Train: [49] [5000/6250] eta: 0:03:23 lr: 0.000068 grad: 0.1757 (0.1829) loss: 0.6697 (0.6840) time: 0.1715 data: 0.0823 max mem: 9377 +Train: [49] [5100/6250] eta: 0:03:07 lr: 0.000068 grad: 0.1665 (0.1827) loss: 0.6704 (0.6838) time: 0.1542 data: 0.0499 max mem: 9377 +Train: [49] [5200/6250] eta: 0:02:51 lr: 0.000068 grad: 0.1657 (0.1824) loss: 0.6812 (0.6838) time: 0.1598 data: 0.0695 max mem: 9377 +Train: [49] [5300/6250] eta: 0:02:34 lr: 0.000068 grad: 0.1755 (0.1822) loss: 0.6688 (0.6837) time: 0.1728 data: 0.0900 max mem: 9377 +Train: [49] [5400/6250] eta: 0:02:18 lr: 0.000068 grad: 0.1646 (0.1819) loss: 0.6929 (0.6837) time: 0.1615 data: 0.0781 max mem: 9377 +Train: [49] [5500/6250] eta: 0:02:02 lr: 0.000068 grad: 0.1701 (0.1816) loss: 0.6802 (0.6837) time: 0.1728 data: 0.0878 max mem: 9377 +Train: [49] [5600/6250] eta: 0:01:45 lr: 0.000068 grad: 0.1664 (0.1814) loss: 0.6954 (0.6838) time: 0.1826 data: 0.1029 max mem: 9377 +Train: [49] [5700/6250] eta: 0:01:29 lr: 0.000068 grad: 0.1633 (0.1811) loss: 0.6931 (0.6840) time: 0.1515 data: 0.0562 max mem: 9377 +Train: [49] [5800/6250] eta: 0:01:13 lr: 0.000068 grad: 0.1710 (0.1810) loss: 0.6912 (0.6841) time: 0.1816 data: 0.0897 max mem: 9377 +Train: [49] [5900/6250] eta: 0:00:56 lr: 0.000068 grad: 0.1730 (0.1808) loss: 0.6799 (0.6842) time: 0.1304 data: 0.0362 max mem: 9377 +Train: [49] [6000/6250] eta: 0:00:40 lr: 0.000068 grad: 0.1693 (0.1806) loss: 0.6927 (0.6843) time: 0.1710 data: 0.0912 max mem: 9377 +Train: [49] [6100/6250] eta: 0:00:24 lr: 0.000068 grad: 0.1662 (0.1804) loss: 0.6852 (0.6844) time: 0.1429 data: 0.0618 max mem: 9377 +Train: [49] [6200/6250] eta: 0:00:08 lr: 0.000068 grad: 0.1701 (0.1802) loss: 0.6906 (0.6845) time: 0.1350 data: 0.0479 max mem: 9377 +Train: [49] [6249/6250] eta: 0:00:00 lr: 0.000068 grad: 0.1732 (0.1801) loss: 0.6919 (0.6846) time: 0.1355 data: 0.0539 max mem: 9377 +Train: [49] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000068 grad: 0.1732 (0.1801) loss: 0.6919 (0.6846) +Eval (hcp-train-subset): [49] [ 0/62] eta: 0:04:53 loss: 0.8871 (0.8871) time: 4.7404 data: 4.6488 max mem: 9377 +Eval (hcp-train-subset): [49] [61/62] eta: 0:00:00 loss: 0.8922 (0.8938) time: 0.1412 data: 0.1161 max mem: 9377 +Eval (hcp-train-subset): [49] Total time: 0:00:14 (0.2380 s / it) +Averaged stats (hcp-train-subset): loss: 0.8922 (0.8938) +Making plots (hcp-train-subset): example=38 +Eval (hcp-val): [49] [ 0/62] eta: 0:05:34 loss: 0.8988 (0.8988) time: 5.3992 data: 5.3611 max mem: 9377 +Eval (hcp-val): [49] [61/62] eta: 0:00:00 loss: 0.8927 (0.8941) time: 0.1348 data: 0.1096 max mem: 9377 +Eval (hcp-val): [49] Total time: 0:00:15 (0.2420 s / it) +Averaged stats (hcp-val): loss: 0.8927 (0.8941) +Making plots (hcp-val): example=14 +Eval (nsd-val): [49] [ 0/62] eta: 0:05:32 loss: 0.8612 (0.8612) time: 5.3557 data: 5.3241 max mem: 9377 +Eval (nsd-val): [49] [61/62] eta: 0:00:00 loss: 0.8748 (0.8783) time: 0.1474 data: 0.1220 max mem: 9377 +Eval (nsd-val): [49] Total time: 0:00:14 (0.2342 s / it) +Averaged stats (nsd-val): loss: 0.8748 (0.8783) +Making plots (nsd-val): example=22 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00049.pth +Train: [50] [ 0/6250] eta: 11:46:05 lr: 0.000068 grad: 0.2097 (0.2097) loss: 0.7539 (0.7539) time: 6.7786 data: 6.6567 max mem: 9377 +Train: [50] [ 100/6250] eta: 0:23:14 lr: 0.000068 grad: 0.2370 (0.3287) loss: 0.7522 (0.7136) time: 0.1472 data: 0.0546 max mem: 9377 +Train: [50] [ 200/6250] eta: 0:19:57 lr: 0.000068 grad: 0.1938 (0.2714) loss: 0.7192 (0.7119) time: 0.1824 data: 0.0838 max mem: 9377 +Train: [50] [ 300/6250] eta: 0:18:41 lr: 0.000068 grad: 0.1910 (0.2476) loss: 0.6947 (0.7098) time: 0.1946 data: 0.0998 max mem: 9377 +Train: [50] [ 400/6250] eta: 0:17:36 lr: 0.000068 grad: 0.1907 (0.2357) loss: 0.6915 (0.7057) time: 0.1492 data: 0.0430 max mem: 9377 +Train: [50] [ 500/6250] eta: 0:17:01 lr: 0.000067 grad: 0.1762 (0.2250) loss: 0.6801 (0.7033) time: 0.1793 data: 0.0974 max mem: 9377 +Train: [50] [ 600/6250] eta: 0:16:50 lr: 0.000067 grad: 0.1720 (0.2176) loss: 0.6918 (0.7016) time: 0.1729 data: 0.0751 max mem: 9377 +Train: [50] [ 700/6250] eta: 0:16:25 lr: 0.000067 grad: 0.1697 (0.2118) loss: 0.6971 (0.6996) time: 0.1643 data: 0.0697 max mem: 9377 +Train: [50] [ 800/6250] eta: 0:16:06 lr: 0.000067 grad: 0.1615 (0.2067) loss: 0.7095 (0.6993) time: 0.1729 data: 0.0868 max mem: 9377 +Train: [50] [ 900/6250] eta: 0:15:41 lr: 0.000067 grad: 0.1648 (0.2024) loss: 0.7072 (0.6990) time: 0.1617 data: 0.0821 max mem: 9377 +Train: [50] [1000/6250] eta: 0:15:15 lr: 0.000067 grad: 0.1648 (0.1990) loss: 0.6913 (0.6990) time: 0.1849 data: 0.0871 max mem: 9377 +Train: [50] [1100/6250] eta: 0:14:55 lr: 0.000067 grad: 0.1673 (0.1961) loss: 0.6952 (0.6987) time: 0.1569 data: 0.0649 max mem: 9377 +Train: [50] [1200/6250] eta: 0:14:32 lr: 0.000067 grad: 0.1658 (0.1938) loss: 0.6948 (0.6986) time: 0.1645 data: 0.0661 max mem: 9377 +Train: [50] [1300/6250] eta: 0:14:12 lr: 0.000067 grad: 0.1715 (0.1922) loss: 0.6954 (0.6981) time: 0.1536 data: 0.0569 max mem: 9377 +Train: [50] [1400/6250] eta: 0:13:57 lr: 0.000067 grad: 0.1652 (0.1905) loss: 0.6869 (0.6978) time: 0.2185 data: 0.1422 max mem: 9377 +Train: [50] [1500/6250] eta: 0:13:32 lr: 0.000067 grad: 0.1658 (0.1890) loss: 0.6916 (0.6976) time: 0.1539 data: 0.0624 max mem: 9377 +Train: [50] [1600/6250] eta: 0:13:13 lr: 0.000067 grad: 0.1664 (0.1876) loss: 0.6855 (0.6970) time: 0.1830 data: 0.0903 max mem: 9377 +Train: [50] [1700/6250] eta: 0:12:51 lr: 0.000067 grad: 0.1720 (0.1866) loss: 0.6802 (0.6966) time: 0.1696 data: 0.0903 max mem: 9377 +Train: [50] [1800/6250] eta: 0:12:36 lr: 0.000067 grad: 0.1716 (0.1858) loss: 0.6803 (0.6962) time: 0.1711 data: 0.0863 max mem: 9377 +Train: [50] [1900/6250] eta: 0:12:20 lr: 0.000067 grad: 0.1674 (0.1851) loss: 0.6848 (0.6957) time: 0.1657 data: 0.0683 max mem: 9377 +Train: [50] [2000/6250] eta: 0:12:01 lr: 0.000067 grad: 0.1679 (0.1845) loss: 0.6720 (0.6951) time: 0.1640 data: 0.0787 max mem: 9377 +Train: [50] [2100/6250] eta: 0:11:41 lr: 0.000067 grad: 0.1686 (0.1840) loss: 0.6735 (0.6943) time: 0.1508 data: 0.0661 max mem: 9377 +Train: [50] [2200/6250] eta: 0:11:20 lr: 0.000067 grad: 0.1723 (0.1835) loss: 0.6872 (0.6940) time: 0.1583 data: 0.0653 max mem: 9377 +Train: [50] [2300/6250] eta: 0:11:01 lr: 0.000067 grad: 0.1693 (0.1830) loss: 0.6820 (0.6936) time: 0.1340 data: 0.0424 max mem: 9377 +Train: [50] [2400/6250] eta: 0:10:43 lr: 0.000067 grad: 0.1698 (0.1826) loss: 0.6877 (0.6934) time: 0.1604 data: 0.0676 max mem: 9377 +Train: [50] [2500/6250] eta: 0:10:24 lr: 0.000067 grad: 0.1691 (0.1822) loss: 0.6827 (0.6928) time: 0.1573 data: 0.0609 max mem: 9377 +Train: [50] [2600/6250] eta: 0:10:05 lr: 0.000067 grad: 0.1717 (0.1817) loss: 0.6755 (0.6924) time: 0.1354 data: 0.0419 max mem: 9377 +Train: [50] [2700/6250] eta: 0:09:47 lr: 0.000067 grad: 0.1676 (0.1814) loss: 0.6790 (0.6920) time: 0.1621 data: 0.0754 max mem: 9377 +Train: [50] [2800/6250] eta: 0:09:28 lr: 0.000067 grad: 0.1681 (0.1811) loss: 0.6807 (0.6915) time: 0.1461 data: 0.0491 max mem: 9377 +Train: [50] [2900/6250] eta: 0:09:08 lr: 0.000067 grad: 0.1678 (0.1807) loss: 0.6770 (0.6914) time: 0.1066 data: 0.0125 max mem: 9377 +Train: [50] [3000/6250] eta: 0:08:50 lr: 0.000067 grad: 0.1615 (0.1802) loss: 0.6739 (0.6910) time: 0.1530 data: 0.0608 max mem: 9377 +Train: [50] [3100/6250] eta: 0:08:33 lr: 0.000067 grad: 0.1684 (0.1800) loss: 0.6774 (0.6907) time: 0.1617 data: 0.0684 max mem: 9377 +Train: [50] [3200/6250] eta: 0:08:16 lr: 0.000067 grad: 0.1641 (0.1797) loss: 0.6798 (0.6904) time: 0.1771 data: 0.0887 max mem: 9377 +Train: [50] [3300/6250] eta: 0:08:01 lr: 0.000067 grad: 0.1701 (0.1795) loss: 0.6723 (0.6899) time: 0.1491 data: 0.0675 max mem: 9377 +Train: [50] [3400/6250] eta: 0:07:44 lr: 0.000067 grad: 0.1659 (0.1793) loss: 0.6821 (0.6897) time: 0.1507 data: 0.0580 max mem: 9377 +Train: [50] [3500/6250] eta: 0:07:27 lr: 0.000067 grad: 0.1682 (0.1790) loss: 0.6782 (0.6896) time: 0.1553 data: 0.0693 max mem: 9377 +Train: [50] [3600/6250] eta: 0:07:10 lr: 0.000066 grad: 0.1672 (0.1788) loss: 0.6800 (0.6894) time: 0.1496 data: 0.0637 max mem: 9377 +Train: [50] [3700/6250] eta: 0:06:53 lr: 0.000066 grad: 0.1616 (0.1785) loss: 0.6940 (0.6893) time: 0.1495 data: 0.0564 max mem: 9377 +Train: [50] [3800/6250] eta: 0:06:38 lr: 0.000066 grad: 0.1685 (0.1782) loss: 0.6865 (0.6892) time: 0.1782 data: 0.0883 max mem: 9377 +Train: [50] [3900/6250] eta: 0:06:21 lr: 0.000066 grad: 0.1695 (0.1780) loss: 0.6744 (0.6891) time: 0.1278 data: 0.0420 max mem: 9377 +Train: [50] [4000/6250] eta: 0:06:05 lr: 0.000066 grad: 0.1672 (0.1778) loss: 0.6766 (0.6890) time: 0.1618 data: 0.0759 max mem: 9377 +Train: [50] [4100/6250] eta: 0:05:48 lr: 0.000066 grad: 0.1656 (0.1776) loss: 0.6783 (0.6889) time: 0.1603 data: 0.0824 max mem: 9377 +Train: [50] [4200/6250] eta: 0:05:31 lr: 0.000066 grad: 0.1719 (0.1776) loss: 0.6875 (0.6888) time: 0.1636 data: 0.0672 max mem: 9377 +Train: [50] [4300/6250] eta: 0:05:15 lr: 0.000066 grad: 0.1649 (0.1773) loss: 0.6893 (0.6888) time: 0.1442 data: 0.0518 max mem: 9377 +Train: [50] [4400/6250] eta: 0:04:58 lr: 0.000066 grad: 0.1722 (0.1773) loss: 0.6716 (0.6887) time: 0.1504 data: 0.0570 max mem: 9377 +Train: [50] [4500/6250] eta: 0:04:42 lr: 0.000066 grad: 0.1694 (0.1771) loss: 0.6809 (0.6886) time: 0.1785 data: 0.0884 max mem: 9377 +Train: [50] [4600/6250] eta: 0:04:26 lr: 0.000066 grad: 0.1742 (0.1770) loss: 0.6730 (0.6884) time: 0.1376 data: 0.0460 max mem: 9377 +Train: [50] [4700/6250] eta: 0:04:09 lr: 0.000066 grad: 0.1726 (0.1769) loss: 0.6715 (0.6882) time: 0.1261 data: 0.0375 max mem: 9377 +Train: [50] [4800/6250] eta: 0:03:53 lr: 0.000066 grad: 0.1736 (0.1768) loss: 0.6770 (0.6880) time: 0.1718 data: 0.0907 max mem: 9377 +Train: [50] [4900/6250] eta: 0:03:36 lr: 0.000066 grad: 0.1739 (0.1767) loss: 0.6879 (0.6879) time: 0.1457 data: 0.0561 max mem: 9377 +Train: [50] [5000/6250] eta: 0:03:20 lr: 0.000066 grad: 0.1672 (0.1766) loss: 0.6856 (0.6878) time: 0.1717 data: 0.0893 max mem: 9377 +Train: [50] [5100/6250] eta: 0:03:04 lr: 0.000066 grad: 0.1698 (0.1765) loss: 0.6759 (0.6878) time: 0.1599 data: 0.0705 max mem: 9377 +Train: [50] [5200/6250] eta: 0:02:48 lr: 0.000066 grad: 0.1647 (0.1763) loss: 0.6867 (0.6878) time: 0.1409 data: 0.0492 max mem: 9377 +Train: [50] [5300/6250] eta: 0:02:32 lr: 0.000066 grad: 0.1705 (0.1762) loss: 0.6984 (0.6878) time: 0.1487 data: 0.0578 max mem: 9377 +Train: [50] [5400/6250] eta: 0:02:16 lr: 0.000066 grad: 0.1667 (0.1761) loss: 0.6876 (0.6878) time: 0.1601 data: 0.0725 max mem: 9377 +Train: [50] [5500/6250] eta: 0:01:59 lr: 0.000066 grad: 0.1746 (0.1761) loss: 0.6866 (0.6877) time: 0.1418 data: 0.0569 max mem: 9377 +Train: [50] [5600/6250] eta: 0:01:43 lr: 0.000066 grad: 0.1670 (0.1760) loss: 0.6910 (0.6877) time: 0.1194 data: 0.0223 max mem: 9377 +Train: [50] [5700/6250] eta: 0:01:27 lr: 0.000066 grad: 0.1647 (0.1759) loss: 0.6957 (0.6877) time: 0.1607 data: 0.0734 max mem: 9377 +Train: [50] [5800/6250] eta: 0:01:11 lr: 0.000066 grad: 0.1725 (0.1758) loss: 0.6817 (0.6878) time: 0.1495 data: 0.0646 max mem: 9377 +Train: [50] [5900/6250] eta: 0:00:55 lr: 0.000066 grad: 0.1728 (0.1757) loss: 0.6939 (0.6878) time: 0.1562 data: 0.0640 max mem: 9377 +Train: [50] [6000/6250] eta: 0:00:39 lr: 0.000066 grad: 0.1660 (0.1756) loss: 0.6885 (0.6879) time: 0.1558 data: 0.0728 max mem: 9377 +Train: [50] [6100/6250] eta: 0:00:23 lr: 0.000066 grad: 0.1680 (0.1755) loss: 0.6880 (0.6879) time: 0.1531 data: 0.0633 max mem: 9377 +Train: [50] [6200/6250] eta: 0:00:07 lr: 0.000066 grad: 0.1673 (0.1754) loss: 0.6852 (0.6878) time: 0.1595 data: 0.0736 max mem: 9377 +Train: [50] [6249/6250] eta: 0:00:00 lr: 0.000066 grad: 0.1729 (0.1754) loss: 0.6795 (0.6878) time: 0.1882 data: 0.1025 max mem: 9377 +Train: [50] Total time: 0:16:41 (0.1603 s / it) +Averaged stats: lr: 0.000066 grad: 0.1729 (0.1754) loss: 0.6795 (0.6878) +Eval (hcp-train-subset): [50] [ 0/62] eta: 0:06:18 loss: 0.8827 (0.8827) time: 6.1035 data: 6.0729 max mem: 9377 +Eval (hcp-train-subset): [50] [61/62] eta: 0:00:00 loss: 0.8899 (0.8922) time: 0.1479 data: 0.1228 max mem: 9377 +Eval (hcp-train-subset): [50] Total time: 0:00:14 (0.2390 s / it) +Averaged stats (hcp-train-subset): loss: 0.8899 (0.8922) +Eval (hcp-val): [50] [ 0/62] eta: 0:06:11 loss: 0.8923 (0.8923) time: 5.9921 data: 5.9609 max mem: 9377 +Eval (hcp-val): [50] [61/62] eta: 0:00:00 loss: 0.8951 (0.8934) time: 0.1290 data: 0.1040 max mem: 9377 +Eval (hcp-val): [50] Total time: 0:00:14 (0.2404 s / it) +Averaged stats (hcp-val): loss: 0.8951 (0.8934) +Eval (nsd-val): [50] [ 0/62] eta: 0:04:31 loss: 0.8621 (0.8621) time: 4.3774 data: 4.3020 max mem: 9377 +Eval (nsd-val): [50] [61/62] eta: 0:00:00 loss: 0.8784 (0.8780) time: 0.1260 data: 0.1005 max mem: 9377 +Eval (nsd-val): [50] Total time: 0:00:14 (0.2333 s / it) +Averaged stats (nsd-val): loss: 0.8784 (0.8780) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [51] [ 0/6250] eta: 10:02:53 lr: 0.000066 grad: 0.2644 (0.2644) loss: 0.6011 (0.6011) time: 5.7877 data: 5.5694 max mem: 9377 +Train: [51] [ 100/6250] eta: 0:22:51 lr: 0.000066 grad: 0.2931 (0.3325) loss: 0.6741 (0.6961) time: 0.1738 data: 0.0632 max mem: 9377 +Train: [51] [ 200/6250] eta: 0:19:41 lr: 0.000066 grad: 0.2439 (0.2997) loss: 0.6706 (0.6916) time: 0.1741 data: 0.0750 max mem: 9377 +Train: [51] [ 300/6250] eta: 0:18:31 lr: 0.000065 grad: 0.2096 (0.2757) loss: 0.6921 (0.6913) time: 0.1667 data: 0.0749 max mem: 9377 +Train: [51] [ 400/6250] eta: 0:17:34 lr: 0.000065 grad: 0.2029 (0.2615) loss: 0.6870 (0.6900) time: 0.1500 data: 0.0438 max mem: 9377 +Train: [51] [ 500/6250] eta: 0:16:54 lr: 0.000065 grad: 0.1857 (0.2488) loss: 0.6762 (0.6888) time: 0.1546 data: 0.0511 max mem: 9377 +Train: [51] [ 600/6250] eta: 0:16:16 lr: 0.000065 grad: 0.1977 (0.2400) loss: 0.6754 (0.6865) time: 0.1296 data: 0.0355 max mem: 9377 +Train: [51] [ 700/6250] eta: 0:16:00 lr: 0.000065 grad: 0.1715 (0.2322) loss: 0.6774 (0.6853) time: 0.1541 data: 0.0653 max mem: 9377 +Train: [51] [ 800/6250] eta: 0:15:40 lr: 0.000065 grad: 0.1837 (0.2255) loss: 0.6721 (0.6852) time: 0.1716 data: 0.0787 max mem: 9377 +Train: [51] [ 900/6250] eta: 0:15:16 lr: 0.000065 grad: 0.1754 (0.2201) loss: 0.6952 (0.6858) time: 0.1388 data: 0.0416 max mem: 9377 +Train: [51] [1000/6250] eta: 0:14:53 lr: 0.000065 grad: 0.1629 (0.2152) loss: 0.6896 (0.6862) time: 0.1715 data: 0.0894 max mem: 9377 +Train: [51] [1100/6250] eta: 0:14:32 lr: 0.000065 grad: 0.1757 (0.2115) loss: 0.6882 (0.6863) time: 0.1731 data: 0.0905 max mem: 9377 +Train: [51] [1200/6250] eta: 0:14:04 lr: 0.000065 grad: 0.1740 (0.2082) loss: 0.6804 (0.6864) time: 0.1547 data: 0.0587 max mem: 9377 +Train: [51] [1300/6250] eta: 0:13:40 lr: 0.000065 grad: 0.1739 (0.2056) loss: 0.6756 (0.6859) time: 0.1462 data: 0.0484 max mem: 9377 +Train: [51] [1400/6250] eta: 0:13:17 lr: 0.000065 grad: 0.1698 (0.2035) loss: 0.6864 (0.6854) time: 0.1578 data: 0.0701 max mem: 9377 +Train: [51] [1500/6250] eta: 0:12:57 lr: 0.000065 grad: 0.1675 (0.2014) loss: 0.6754 (0.6851) time: 0.1508 data: 0.0612 max mem: 9377 +Train: [51] [1600/6250] eta: 0:12:38 lr: 0.000065 grad: 0.1665 (0.1996) loss: 0.6773 (0.6851) time: 0.1506 data: 0.0659 max mem: 9377 +Train: [51] [1700/6250] eta: 0:12:22 lr: 0.000065 grad: 0.1691 (0.1981) loss: 0.6720 (0.6845) time: 0.1621 data: 0.0803 max mem: 9377 +Train: [51] [1800/6250] eta: 0:12:05 lr: 0.000065 grad: 0.1699 (0.1969) loss: 0.6846 (0.6844) time: 0.1299 data: 0.0339 max mem: 9377 +Train: [51] [1900/6250] eta: 0:11:47 lr: 0.000065 grad: 0.1710 (0.1956) loss: 0.6844 (0.6843) time: 0.1403 data: 0.0581 max mem: 9377 +Train: [51] [2000/6250] eta: 0:11:29 lr: 0.000065 grad: 0.1720 (0.1944) loss: 0.6942 (0.6844) time: 0.1367 data: 0.0525 max mem: 9377 +Train: [51] [2100/6250] eta: 0:11:12 lr: 0.000065 grad: 0.1723 (0.1935) loss: 0.6842 (0.6844) time: 0.1306 data: 0.0452 max mem: 9377 +Train: [51] [2200/6250] eta: 0:10:54 lr: 0.000065 grad: 0.1772 (0.1927) loss: 0.6802 (0.6841) time: 0.1489 data: 0.0716 max mem: 9377 +Train: [51] [2300/6250] eta: 0:10:40 lr: 0.000065 grad: 0.1847 (0.1920) loss: 0.6686 (0.6838) time: 0.1884 data: 0.0940 max mem: 9377 +Train: [51] [2400/6250] eta: 0:10:27 lr: 0.000065 grad: 0.1713 (0.1915) loss: 0.6721 (0.6836) time: 0.1948 data: 0.0970 max mem: 9377 +Train: [51] [2500/6250] eta: 0:10:10 lr: 0.000065 grad: 0.1724 (0.1907) loss: 0.6753 (0.6836) time: 0.1518 data: 0.0684 max mem: 9377 +Train: [51] [2600/6250] eta: 0:09:53 lr: 0.000065 grad: 0.1760 (0.1901) loss: 0.6785 (0.6835) time: 0.1560 data: 0.0702 max mem: 9377 +Train: [51] [2700/6250] eta: 0:09:37 lr: 0.000065 grad: 0.1769 (0.1897) loss: 0.6724 (0.6834) time: 0.2254 data: 0.1386 max mem: 9377 +Train: [51] [2800/6250] eta: 0:09:17 lr: 0.000065 grad: 0.1742 (0.1894) loss: 0.6831 (0.6831) time: 0.1421 data: 0.0572 max mem: 9377 +Train: [51] [2900/6250] eta: 0:09:01 lr: 0.000065 grad: 0.1717 (0.1889) loss: 0.6923 (0.6830) time: 0.1565 data: 0.0760 max mem: 9377 +Train: [51] [3000/6250] eta: 0:08:46 lr: 0.000065 grad: 0.1633 (0.1883) loss: 0.6893 (0.6831) time: 0.2088 data: 0.1280 max mem: 9377 +Train: [51] [3100/6250] eta: 0:08:28 lr: 0.000065 grad: 0.1712 (0.1878) loss: 0.6814 (0.6831) time: 0.1430 data: 0.0560 max mem: 9377 +Train: [51] [3200/6250] eta: 0:08:12 lr: 0.000065 grad: 0.1757 (0.1874) loss: 0.6754 (0.6830) time: 0.1790 data: 0.0882 max mem: 9377 +Train: [51] [3300/6250] eta: 0:07:56 lr: 0.000065 grad: 0.1708 (0.1870) loss: 0.6901 (0.6830) time: 0.1782 data: 0.0864 max mem: 9377 +Train: [51] [3400/6250] eta: 0:07:41 lr: 0.000064 grad: 0.1754 (0.1866) loss: 0.6689 (0.6830) time: 0.1723 data: 0.0851 max mem: 9377 +Train: [51] [3500/6250] eta: 0:07:26 lr: 0.000064 grad: 0.1698 (0.1862) loss: 0.6764 (0.6829) time: 0.2001 data: 0.1129 max mem: 9377 +Train: [51] [3600/6250] eta: 0:07:11 lr: 0.000064 grad: 0.1747 (0.1858) loss: 0.6776 (0.6827) time: 0.1926 data: 0.1059 max mem: 9377 +Train: [51] [3700/6250] eta: 0:06:56 lr: 0.000064 grad: 0.1740 (0.1856) loss: 0.6636 (0.6826) time: 0.1955 data: 0.1066 max mem: 9377 +Train: [51] [3800/6250] eta: 0:06:41 lr: 0.000064 grad: 0.1665 (0.1852) loss: 0.6751 (0.6824) time: 0.1750 data: 0.0812 max mem: 9377 +Train: [51] [3900/6250] eta: 0:06:26 lr: 0.000064 grad: 0.1695 (0.1850) loss: 0.6700 (0.6822) time: 0.1398 data: 0.0389 max mem: 9377 +Train: [51] [4000/6250] eta: 0:06:11 lr: 0.000064 grad: 0.1727 (0.1847) loss: 0.6697 (0.6820) time: 0.1723 data: 0.0845 max mem: 9377 +Train: [51] [4100/6250] eta: 0:05:55 lr: 0.000064 grad: 0.1709 (0.1844) loss: 0.6858 (0.6819) time: 0.1528 data: 0.0651 max mem: 9377 +Train: [51] [4200/6250] eta: 0:05:38 lr: 0.000064 grad: 0.1661 (0.1841) loss: 0.6924 (0.6818) time: 0.1645 data: 0.0765 max mem: 9377 +Train: [51] [4300/6250] eta: 0:05:22 lr: 0.000064 grad: 0.1688 (0.1838) loss: 0.6794 (0.6817) time: 0.1840 data: 0.0947 max mem: 9377 +Train: [51] [4400/6250] eta: 0:05:05 lr: 0.000064 grad: 0.1659 (0.1835) loss: 0.6769 (0.6817) time: 0.1386 data: 0.0558 max mem: 9377 +Train: [51] [4500/6250] eta: 0:04:49 lr: 0.000064 grad: 0.1707 (0.1832) loss: 0.6944 (0.6817) time: 0.1969 data: 0.1125 max mem: 9377 +Train: [51] [4600/6250] eta: 0:04:32 lr: 0.000064 grad: 0.1705 (0.1830) loss: 0.6759 (0.6816) time: 0.2444 data: 0.1603 max mem: 9377 +Train: [51] [4700/6250] eta: 0:04:15 lr: 0.000064 grad: 0.1705 (0.1828) loss: 0.6810 (0.6816) time: 0.1641 data: 0.0749 max mem: 9377 +Train: [51] [4800/6250] eta: 0:03:59 lr: 0.000064 grad: 0.1743 (0.1825) loss: 0.6724 (0.6816) time: 0.1232 data: 0.0403 max mem: 9377 +Train: [51] [4900/6250] eta: 0:03:42 lr: 0.000064 grad: 0.1712 (0.1824) loss: 0.6828 (0.6816) time: 0.1668 data: 0.0764 max mem: 9377 +Train: [51] [5000/6250] eta: 0:03:25 lr: 0.000064 grad: 0.1696 (0.1823) loss: 0.6897 (0.6816) time: 0.1514 data: 0.0588 max mem: 9377 +Train: [51] [5100/6250] eta: 0:03:09 lr: 0.000064 grad: 0.1712 (0.1821) loss: 0.6950 (0.6816) time: 0.1596 data: 0.0718 max mem: 9377 +Train: [51] [5200/6250] eta: 0:02:52 lr: 0.000064 grad: 0.1771 (0.1820) loss: 0.6705 (0.6815) time: 0.1631 data: 0.0759 max mem: 9377 +Train: [51] [5300/6250] eta: 0:02:36 lr: 0.000064 grad: 0.1702 (0.1819) loss: 0.6850 (0.6815) time: 0.1713 data: 0.0866 max mem: 9377 +Train: [51] [5400/6250] eta: 0:02:19 lr: 0.000064 grad: 0.1731 (0.1817) loss: 0.6732 (0.6815) time: 0.1672 data: 0.0685 max mem: 9377 +Train: [51] [5500/6250] eta: 0:02:02 lr: 0.000064 grad: 0.1704 (0.1815) loss: 0.6833 (0.6815) time: 0.1523 data: 0.0631 max mem: 9377 +Train: [51] [5600/6250] eta: 0:01:46 lr: 0.000064 grad: 0.1800 (0.1815) loss: 0.6777 (0.6815) time: 0.1522 data: 0.0633 max mem: 9377 +Train: [51] [5700/6250] eta: 0:01:30 lr: 0.000064 grad: 0.1703 (0.1814) loss: 0.6896 (0.6814) time: 0.1637 data: 0.0766 max mem: 9377 +Train: [51] [5800/6250] eta: 0:01:13 lr: 0.000064 grad: 0.1698 (0.1813) loss: 0.6800 (0.6814) time: 0.1517 data: 0.0595 max mem: 9377 +Train: [51] [5900/6250] eta: 0:00:57 lr: 0.000064 grad: 0.1693 (0.1811) loss: 0.6745 (0.6814) time: 0.1558 data: 0.0609 max mem: 9377 +Train: [51] [6000/6250] eta: 0:00:40 lr: 0.000064 grad: 0.1679 (0.1810) loss: 0.6845 (0.6815) time: 0.1562 data: 0.0742 max mem: 9377 +Train: [51] [6100/6250] eta: 0:00:24 lr: 0.000064 grad: 0.1660 (0.1808) loss: 0.6947 (0.6816) time: 0.1549 data: 0.0649 max mem: 9377 +Train: [51] [6200/6250] eta: 0:00:08 lr: 0.000064 grad: 0.1775 (0.1807) loss: 0.6732 (0.6816) time: 0.1276 data: 0.0310 max mem: 9377 +Train: [51] [6249/6250] eta: 0:00:00 lr: 0.000064 grad: 0.1710 (0.1807) loss: 0.6901 (0.6816) time: 0.1814 data: 0.0925 max mem: 9377 +Train: [51] Total time: 0:17:08 (0.1645 s / it) +Averaged stats: lr: 0.000064 grad: 0.1710 (0.1807) loss: 0.6901 (0.6816) +Eval (hcp-train-subset): [51] [ 0/62] eta: 0:05:13 loss: 0.8816 (0.8816) time: 5.0600 data: 5.0282 max mem: 9377 +Eval (hcp-train-subset): [51] [61/62] eta: 0:00:00 loss: 0.8953 (0.8946) time: 0.1561 data: 0.1307 max mem: 9377 +Eval (hcp-train-subset): [51] Total time: 0:00:15 (0.2432 s / it) +Averaged stats (hcp-train-subset): loss: 0.8953 (0.8946) +Eval (hcp-val): [51] [ 0/62] eta: 0:06:55 loss: 0.8949 (0.8949) time: 6.6955 data: 6.6286 max mem: 9377 +Eval (hcp-val): [51] [61/62] eta: 0:00:00 loss: 0.8923 (0.8937) time: 0.1385 data: 0.1134 max mem: 9377 +Eval (hcp-val): [51] Total time: 0:00:15 (0.2554 s / it) +Averaged stats (hcp-val): loss: 0.8923 (0.8937) +Eval (nsd-val): [51] [ 0/62] eta: 0:05:50 loss: 0.8733 (0.8733) time: 5.6532 data: 5.6198 max mem: 9377 +Eval (nsd-val): [51] [61/62] eta: 0:00:00 loss: 0.8841 (0.8854) time: 0.1383 data: 0.1112 max mem: 9377 +Eval (nsd-val): [51] Total time: 0:00:15 (0.2481 s / it) +Averaged stats (nsd-val): loss: 0.8841 (0.8854) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [52] [ 0/6250] eta: 10:24:41 lr: 0.000064 grad: 0.3177 (0.3177) loss: 0.5881 (0.5881) time: 5.9970 data: 5.8167 max mem: 9377 +Train: [52] [ 100/6250] eta: 0:22:52 lr: 0.000063 grad: 0.2668 (0.3556) loss: 0.7180 (0.7043) time: 0.1829 data: 0.0805 max mem: 9377 +Train: [52] [ 200/6250] eta: 0:20:08 lr: 0.000063 grad: 0.3242 (0.3458) loss: 0.6776 (0.6994) time: 0.1753 data: 0.0769 max mem: 9377 +Train: [52] [ 300/6250] eta: 0:18:33 lr: 0.000063 grad: 0.2560 (0.3330) loss: 0.6789 (0.6941) time: 0.1565 data: 0.0496 max mem: 9377 +Train: [52] [ 400/6250] eta: 0:17:37 lr: 0.000063 grad: 0.2378 (0.3108) loss: 0.6910 (0.6938) time: 0.1458 data: 0.0397 max mem: 9377 +Train: [52] [ 500/6250] eta: 0:16:50 lr: 0.000063 grad: 0.2467 (0.3013) loss: 0.6856 (0.6928) time: 0.1429 data: 0.0479 max mem: 9377 +Train: [52] [ 600/6250] eta: 0:16:13 lr: 0.000063 grad: 0.2093 (0.2923) loss: 0.6817 (0.6930) time: 0.1621 data: 0.0589 max mem: 9377 +Train: [52] [ 700/6250] eta: 0:15:56 lr: 0.000063 grad: 0.2061 (0.2810) loss: 0.6904 (0.6929) time: 0.1796 data: 0.0900 max mem: 9377 +Train: [52] [ 800/6250] eta: 0:15:33 lr: 0.000063 grad: 0.1840 (0.2694) loss: 0.6775 (0.6922) time: 0.1696 data: 0.0728 max mem: 9377 +Train: [52] [ 900/6250] eta: 0:15:12 lr: 0.000063 grad: 0.1718 (0.2596) loss: 0.6888 (0.6916) time: 0.1724 data: 0.0742 max mem: 9377 +Train: [52] [1000/6250] eta: 0:14:46 lr: 0.000063 grad: 0.1787 (0.2520) loss: 0.6887 (0.6906) time: 0.1698 data: 0.0763 max mem: 9377 +Train: [52] [1100/6250] eta: 0:14:26 lr: 0.000063 grad: 0.1754 (0.2456) loss: 0.6845 (0.6902) time: 0.1565 data: 0.0631 max mem: 9377 +Train: [52] [1200/6250] eta: 0:14:04 lr: 0.000063 grad: 0.1783 (0.2399) loss: 0.6771 (0.6892) time: 0.1531 data: 0.0664 max mem: 9377 +Train: [52] [1300/6250] eta: 0:13:42 lr: 0.000063 grad: 0.1719 (0.2352) loss: 0.6666 (0.6882) time: 0.1575 data: 0.0609 max mem: 9377 +Train: [52] [1400/6250] eta: 0:13:19 lr: 0.000063 grad: 0.1780 (0.2309) loss: 0.6793 (0.6874) time: 0.1468 data: 0.0572 max mem: 9377 +Train: [52] [1500/6250] eta: 0:12:58 lr: 0.000063 grad: 0.1745 (0.2271) loss: 0.6762 (0.6870) time: 0.1312 data: 0.0422 max mem: 9377 +Train: [52] [1600/6250] eta: 0:12:37 lr: 0.000063 grad: 0.1748 (0.2236) loss: 0.6712 (0.6864) time: 0.1435 data: 0.0457 max mem: 9377 +Train: [52] [1700/6250] eta: 0:12:17 lr: 0.000063 grad: 0.1680 (0.2205) loss: 0.6819 (0.6857) time: 0.1096 data: 0.0213 max mem: 9377 +Train: [52] [1800/6250] eta: 0:12:00 lr: 0.000063 grad: 0.1669 (0.2179) loss: 0.6678 (0.6851) time: 0.1631 data: 0.0851 max mem: 9377 +Train: [52] [1900/6250] eta: 0:11:44 lr: 0.000063 grad: 0.1727 (0.2155) loss: 0.6623 (0.6847) time: 0.2004 data: 0.0984 max mem: 9377 +Train: [52] [2000/6250] eta: 0:11:28 lr: 0.000063 grad: 0.1642 (0.2132) loss: 0.6995 (0.6847) time: 0.1900 data: 0.1042 max mem: 9377 +Train: [52] [2100/6250] eta: 0:11:11 lr: 0.000063 grad: 0.1728 (0.2112) loss: 0.6743 (0.6846) time: 0.1661 data: 0.0810 max mem: 9377 +Train: [52] [2200/6250] eta: 0:10:55 lr: 0.000063 grad: 0.1634 (0.2094) loss: 0.6866 (0.6846) time: 0.1458 data: 0.0629 max mem: 9377 +Train: [52] [2300/6250] eta: 0:10:40 lr: 0.000063 grad: 0.1677 (0.2077) loss: 0.6813 (0.6847) time: 0.1705 data: 0.0861 max mem: 9377 +Train: [52] [2400/6250] eta: 0:10:22 lr: 0.000063 grad: 0.1738 (0.2063) loss: 0.6789 (0.6849) time: 0.1385 data: 0.0446 max mem: 9377 +Train: [52] [2500/6250] eta: 0:10:08 lr: 0.000063 grad: 0.1662 (0.2048) loss: 0.6793 (0.6851) time: 0.1755 data: 0.0732 max mem: 9377 +Train: [52] [2600/6250] eta: 0:09:52 lr: 0.000063 grad: 0.1639 (0.2034) loss: 0.6873 (0.6851) time: 0.1682 data: 0.0677 max mem: 9377 +Train: [52] [2700/6250] eta: 0:09:36 lr: 0.000063 grad: 0.1677 (0.2021) loss: 0.6936 (0.6853) time: 0.1679 data: 0.0731 max mem: 9377 +Train: [52] [2800/6250] eta: 0:09:19 lr: 0.000063 grad: 0.1647 (0.2010) loss: 0.6844 (0.6853) time: 0.1672 data: 0.0797 max mem: 9377 +Train: [52] [2900/6250] eta: 0:09:02 lr: 0.000063 grad: 0.1723 (0.2000) loss: 0.6854 (0.6853) time: 0.1774 data: 0.0936 max mem: 9377 +Train: [52] [3000/6250] eta: 0:08:45 lr: 0.000063 grad: 0.1778 (0.1992) loss: 0.6835 (0.6853) time: 0.1421 data: 0.0496 max mem: 9377 +Train: [52] [3100/6250] eta: 0:08:28 lr: 0.000063 grad: 0.1739 (0.1983) loss: 0.6871 (0.6854) time: 0.1603 data: 0.0726 max mem: 9377 +Train: [52] [3200/6250] eta: 0:08:11 lr: 0.000062 grad: 0.1705 (0.1974) loss: 0.6989 (0.6853) time: 0.1479 data: 0.0577 max mem: 9377 +Train: [52] [3300/6250] eta: 0:07:55 lr: 0.000062 grad: 0.1729 (0.1967) loss: 0.6804 (0.6854) time: 0.1251 data: 0.0320 max mem: 9377 +Train: [52] [3400/6250] eta: 0:07:39 lr: 0.000062 grad: 0.1722 (0.1960) loss: 0.6797 (0.6853) time: 0.1542 data: 0.0666 max mem: 9377 +Train: [52] [3500/6250] eta: 0:07:23 lr: 0.000062 grad: 0.1704 (0.1953) loss: 0.6932 (0.6853) time: 0.1752 data: 0.0953 max mem: 9377 +Train: [52] [3600/6250] eta: 0:07:07 lr: 0.000062 grad: 0.1738 (0.1948) loss: 0.6903 (0.6852) time: 0.1582 data: 0.0728 max mem: 9377 +Train: [52] [3700/6250] eta: 0:06:51 lr: 0.000062 grad: 0.1713 (0.1941) loss: 0.6831 (0.6853) time: 0.1791 data: 0.0953 max mem: 9377 +Train: [52] [3800/6250] eta: 0:06:35 lr: 0.000062 grad: 0.1682 (0.1935) loss: 0.6866 (0.6855) time: 0.1533 data: 0.0624 max mem: 9377 +Train: [52] [3900/6250] eta: 0:06:20 lr: 0.000062 grad: 0.1636 (0.1929) loss: 0.6840 (0.6856) time: 0.1917 data: 0.1019 max mem: 9377 +Train: [52] [4000/6250] eta: 0:06:04 lr: 0.000062 grad: 0.1698 (0.1923) loss: 0.6917 (0.6858) time: 0.1482 data: 0.0563 max mem: 9377 +Train: [52] [4100/6250] eta: 0:05:47 lr: 0.000062 grad: 0.1679 (0.1918) loss: 0.6937 (0.6860) time: 0.1600 data: 0.0697 max mem: 9377 +Train: [52] [4200/6250] eta: 0:05:31 lr: 0.000062 grad: 0.1694 (0.1913) loss: 0.6917 (0.6862) time: 0.1814 data: 0.0943 max mem: 9377 +Train: [52] [4300/6250] eta: 0:05:15 lr: 0.000062 grad: 0.1679 (0.1908) loss: 0.6836 (0.6864) time: 0.1590 data: 0.0677 max mem: 9377 +Train: [52] [4400/6250] eta: 0:04:58 lr: 0.000062 grad: 0.1724 (0.1904) loss: 0.6840 (0.6866) time: 0.1410 data: 0.0583 max mem: 9377 +Train: [52] [4500/6250] eta: 0:04:42 lr: 0.000062 grad: 0.1658 (0.1899) loss: 0.7000 (0.6869) time: 0.1566 data: 0.0664 max mem: 9377 +Train: [52] [4600/6250] eta: 0:04:26 lr: 0.000062 grad: 0.1683 (0.1895) loss: 0.6860 (0.6870) time: 0.1841 data: 0.0955 max mem: 9377 +Train: [52] [4700/6250] eta: 0:04:09 lr: 0.000062 grad: 0.1662 (0.1891) loss: 0.6901 (0.6872) time: 0.1407 data: 0.0396 max mem: 9377 +Train: [52] [4800/6250] eta: 0:03:53 lr: 0.000062 grad: 0.1683 (0.1887) loss: 0.6801 (0.6873) time: 0.1498 data: 0.0657 max mem: 9377 +Train: [52] [4900/6250] eta: 0:03:37 lr: 0.000062 grad: 0.1688 (0.1884) loss: 0.6816 (0.6874) time: 0.1669 data: 0.0882 max mem: 9377 +Train: [52] [5000/6250] eta: 0:03:20 lr: 0.000062 grad: 0.1776 (0.1881) loss: 0.6714 (0.6874) time: 0.1462 data: 0.0594 max mem: 9377 +Train: [52] [5100/6250] eta: 0:03:04 lr: 0.000062 grad: 0.1697 (0.1878) loss: 0.6831 (0.6874) time: 0.1639 data: 0.0801 max mem: 9377 +Train: [52] [5200/6250] eta: 0:02:48 lr: 0.000062 grad: 0.1710 (0.1875) loss: 0.6911 (0.6874) time: 0.1743 data: 0.0900 max mem: 9377 +Train: [52] [5300/6250] eta: 0:02:32 lr: 0.000062 grad: 0.1705 (0.1873) loss: 0.6916 (0.6874) time: 0.1620 data: 0.0704 max mem: 9377 +Train: [52] [5400/6250] eta: 0:02:16 lr: 0.000062 grad: 0.1781 (0.1871) loss: 0.6719 (0.6872) time: 0.1230 data: 0.0300 max mem: 9377 +Train: [52] [5500/6250] eta: 0:02:00 lr: 0.000062 grad: 0.1768 (0.1868) loss: 0.6785 (0.6871) time: 0.1321 data: 0.0360 max mem: 9377 +Train: [52] [5600/6250] eta: 0:01:44 lr: 0.000062 grad: 0.1722 (0.1867) loss: 0.6777 (0.6870) time: 0.1475 data: 0.0544 max mem: 9377 +Train: [52] [5700/6250] eta: 0:01:28 lr: 0.000062 grad: 0.1716 (0.1865) loss: 0.6851 (0.6869) time: 0.1712 data: 0.0767 max mem: 9377 +Train: [52] [5800/6250] eta: 0:01:12 lr: 0.000062 grad: 0.1778 (0.1862) loss: 0.6807 (0.6869) time: 0.1631 data: 0.0826 max mem: 9377 +Train: [52] [5900/6250] eta: 0:00:56 lr: 0.000062 grad: 0.1686 (0.1860) loss: 0.6972 (0.6869) time: 0.1447 data: 0.0607 max mem: 9377 +Train: [52] [6000/6250] eta: 0:00:40 lr: 0.000062 grad: 0.1711 (0.1858) loss: 0.6976 (0.6870) time: 0.1689 data: 0.0802 max mem: 9377 +Train: [52] [6100/6250] eta: 0:00:23 lr: 0.000062 grad: 0.1731 (0.1856) loss: 0.6786 (0.6870) time: 0.1556 data: 0.0713 max mem: 9377 +Train: [52] [6200/6250] eta: 0:00:07 lr: 0.000061 grad: 0.1715 (0.1854) loss: 0.6922 (0.6870) time: 0.1646 data: 0.0846 max mem: 9377 +Train: [52] [6249/6250] eta: 0:00:00 lr: 0.000061 grad: 0.1680 (0.1853) loss: 0.6979 (0.6870) time: 0.1422 data: 0.0485 max mem: 9377 +Train: [52] Total time: 0:16:44 (0.1607 s / it) +Averaged stats: lr: 0.000061 grad: 0.1680 (0.1853) loss: 0.6979 (0.6870) +Eval (hcp-train-subset): [52] [ 0/62] eta: 0:04:14 loss: 0.8845 (0.8845) time: 4.1073 data: 3.9857 max mem: 9377 +Eval (hcp-train-subset): [52] [61/62] eta: 0:00:00 loss: 0.8921 (0.8958) time: 0.1529 data: 0.1276 max mem: 9377 +Eval (hcp-train-subset): [52] Total time: 0:00:14 (0.2403 s / it) +Averaged stats (hcp-train-subset): loss: 0.8921 (0.8958) +Eval (hcp-val): [52] [ 0/62] eta: 0:05:13 loss: 0.8992 (0.8992) time: 5.0488 data: 5.0159 max mem: 9377 +Eval (hcp-val): [52] [61/62] eta: 0:00:00 loss: 0.8946 (0.8948) time: 0.1447 data: 0.1175 max mem: 9377 +Eval (hcp-val): [52] Total time: 0:00:14 (0.2398 s / it) +Averaged stats (hcp-val): loss: 0.8946 (0.8948) +Eval (nsd-val): [52] [ 0/62] eta: 0:05:59 loss: 0.8712 (0.8712) time: 5.7971 data: 5.7653 max mem: 9377 +Eval (nsd-val): [52] [61/62] eta: 0:00:00 loss: 0.8843 (0.8856) time: 0.1344 data: 0.1095 max mem: 9377 +Eval (nsd-val): [52] Total time: 0:00:14 (0.2323 s / it) +Averaged stats (nsd-val): loss: 0.8843 (0.8856) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [53] [ 0/6250] eta: 8:51:16 lr: 0.000061 grad: 0.3546 (0.3546) loss: 0.6561 (0.6561) time: 5.1003 data: 4.7821 max mem: 9377 +Train: [53] [ 100/6250] eta: 0:21:47 lr: 0.000061 grad: 0.2750 (0.3501) loss: 0.6868 (0.7179) time: 0.1520 data: 0.0493 max mem: 9377 +Train: [53] [ 200/6250] eta: 0:19:35 lr: 0.000061 grad: 0.3191 (0.3658) loss: 0.7113 (0.7094) time: 0.1783 data: 0.0812 max mem: 9377 +Train: [53] [ 300/6250] eta: 0:18:05 lr: 0.000061 grad: 0.2114 (0.3253) loss: 0.6896 (0.7051) time: 0.1566 data: 0.0623 max mem: 9377 +Train: [53] [ 400/6250] eta: 0:17:29 lr: 0.000061 grad: 0.2115 (0.3018) loss: 0.6948 (0.7021) time: 0.1718 data: 0.0774 max mem: 9377 +Train: [53] [ 500/6250] eta: 0:16:49 lr: 0.000061 grad: 0.1789 (0.2823) loss: 0.6872 (0.7015) time: 0.1436 data: 0.0445 max mem: 9377 +Train: [53] [ 600/6250] eta: 0:16:13 lr: 0.000061 grad: 0.1802 (0.2659) loss: 0.7015 (0.7001) time: 0.1386 data: 0.0374 max mem: 9377 +Train: [53] [ 700/6250] eta: 0:15:58 lr: 0.000061 grad: 0.1791 (0.2539) loss: 0.6752 (0.6983) time: 0.1559 data: 0.0691 max mem: 9377 +Train: [53] [ 800/6250] eta: 0:15:38 lr: 0.000061 grad: 0.1762 (0.2450) loss: 0.6903 (0.6972) time: 0.1614 data: 0.0731 max mem: 9377 +Train: [53] [ 900/6250] eta: 0:15:22 lr: 0.000061 grad: 0.1719 (0.2378) loss: 0.6883 (0.6955) time: 0.1483 data: 0.0614 max mem: 9377 +Train: [53] [1000/6250] eta: 0:14:58 lr: 0.000061 grad: 0.1875 (0.2323) loss: 0.6877 (0.6944) time: 0.1394 data: 0.0455 max mem: 9377 +Train: [53] [1100/6250] eta: 0:14:36 lr: 0.000061 grad: 0.1736 (0.2279) loss: 0.6769 (0.6930) time: 0.1458 data: 0.0568 max mem: 9377 +Train: [53] [1200/6250] eta: 0:14:13 lr: 0.000061 grad: 0.1735 (0.2234) loss: 0.6797 (0.6924) time: 0.1383 data: 0.0536 max mem: 9377 +Train: [53] [1300/6250] eta: 0:13:51 lr: 0.000061 grad: 0.1710 (0.2199) loss: 0.6941 (0.6918) time: 0.1642 data: 0.0753 max mem: 9377 +Train: [53] [1400/6250] eta: 0:13:27 lr: 0.000061 grad: 0.1740 (0.2169) loss: 0.6912 (0.6910) time: 0.1537 data: 0.0570 max mem: 9377 +Train: [53] [1500/6250] eta: 0:13:05 lr: 0.000061 grad: 0.1778 (0.2140) loss: 0.6780 (0.6905) time: 0.1597 data: 0.0663 max mem: 9377 +Train: [53] [1600/6250] eta: 0:12:44 lr: 0.000061 grad: 0.1804 (0.2116) loss: 0.6806 (0.6899) time: 0.1640 data: 0.0758 max mem: 9377 +Train: [53] [1700/6250] eta: 0:12:25 lr: 0.000061 grad: 0.1695 (0.2095) loss: 0.6892 (0.6896) time: 0.1539 data: 0.0657 max mem: 9377 +Train: [53] [1800/6250] eta: 0:12:09 lr: 0.000061 grad: 0.1729 (0.2077) loss: 0.6946 (0.6894) time: 0.1478 data: 0.0739 max mem: 9377 +Train: [53] [1900/6250] eta: 0:11:49 lr: 0.000061 grad: 0.1758 (0.2062) loss: 0.6705 (0.6889) time: 0.1431 data: 0.0584 max mem: 9377 +Train: [53] [2000/6250] eta: 0:11:38 lr: 0.000061 grad: 0.1648 (0.2045) loss: 0.6979 (0.6890) time: 0.1843 data: 0.1036 max mem: 9377 +Train: [53] [2100/6250] eta: 0:11:25 lr: 0.000061 grad: 0.1705 (0.2029) loss: 0.6874 (0.6890) time: 0.1848 data: 0.0937 max mem: 9377 +Train: [53] [2200/6250] eta: 0:11:10 lr: 0.000061 grad: 0.1728 (0.2015) loss: 0.6903 (0.6889) time: 0.1676 data: 0.0881 max mem: 9377 +Train: [53] [2300/6250] eta: 0:10:55 lr: 0.000061 grad: 0.1764 (0.2002) loss: 0.7042 (0.6890) time: 0.1797 data: 0.0905 max mem: 9377 +Train: [53] [2400/6250] eta: 0:10:41 lr: 0.000061 grad: 0.1763 (0.1993) loss: 0.6835 (0.6891) time: 0.1863 data: 0.0892 max mem: 9377 +Train: [53] [2500/6250] eta: 0:10:25 lr: 0.000061 grad: 0.1644 (0.1981) loss: 0.6739 (0.6891) time: 0.1889 data: 0.0932 max mem: 9377 +Train: [53] [2600/6250] eta: 0:10:09 lr: 0.000061 grad: 0.1712 (0.1972) loss: 0.6920 (0.6888) time: 0.1572 data: 0.0648 max mem: 9377 +Train: [53] [2700/6250] eta: 0:09:53 lr: 0.000061 grad: 0.1727 (0.1963) loss: 0.6893 (0.6887) time: 0.1700 data: 0.0725 max mem: 9377 +Train: [53] [2800/6250] eta: 0:09:36 lr: 0.000061 grad: 0.1690 (0.1955) loss: 0.6849 (0.6885) time: 0.1471 data: 0.0496 max mem: 9377 +Train: [53] [2900/6250] eta: 0:09:18 lr: 0.000061 grad: 0.1702 (0.1947) loss: 0.6875 (0.6885) time: 0.1677 data: 0.0736 max mem: 9377 +Train: [53] [3000/6250] eta: 0:09:00 lr: 0.000060 grad: 0.1661 (0.1938) loss: 0.6958 (0.6885) time: 0.1472 data: 0.0536 max mem: 9377 +Train: [53] [3100/6250] eta: 0:08:42 lr: 0.000060 grad: 0.1703 (0.1932) loss: 0.6852 (0.6885) time: 0.1641 data: 0.0760 max mem: 9377 +Train: [53] [3200/6250] eta: 0:08:25 lr: 0.000060 grad: 0.1703 (0.1926) loss: 0.6801 (0.6884) time: 0.1403 data: 0.0501 max mem: 9377 +Train: [53] [3300/6250] eta: 0:08:08 lr: 0.000060 grad: 0.1712 (0.1921) loss: 0.6702 (0.6881) time: 0.1531 data: 0.0672 max mem: 9377 +Train: [53] [3400/6250] eta: 0:07:52 lr: 0.000060 grad: 0.1723 (0.1917) loss: 0.6745 (0.6878) time: 0.1782 data: 0.0965 max mem: 9377 +Train: [53] [3500/6250] eta: 0:07:35 lr: 0.000060 grad: 0.1742 (0.1913) loss: 0.6832 (0.6876) time: 0.1692 data: 0.0839 max mem: 9377 +Train: [53] [3600/6250] eta: 0:07:19 lr: 0.000060 grad: 0.1767 (0.1909) loss: 0.6850 (0.6875) time: 0.1830 data: 0.1030 max mem: 9377 +Train: [53] [3700/6250] eta: 0:07:03 lr: 0.000060 grad: 0.1705 (0.1905) loss: 0.6630 (0.6873) time: 0.1905 data: 0.1032 max mem: 9377 +Train: [53] [3800/6250] eta: 0:06:46 lr: 0.000060 grad: 0.1718 (0.1901) loss: 0.6847 (0.6870) time: 0.1502 data: 0.0607 max mem: 9377 +Train: [53] [3900/6250] eta: 0:06:30 lr: 0.000060 grad: 0.1723 (0.1897) loss: 0.6764 (0.6866) time: 0.1506 data: 0.0454 max mem: 9377 +Train: [53] [4000/6250] eta: 0:06:14 lr: 0.000060 grad: 0.1676 (0.1894) loss: 0.6666 (0.6864) time: 0.1757 data: 0.0901 max mem: 9377 +Train: [53] [4100/6250] eta: 0:05:57 lr: 0.000060 grad: 0.1666 (0.1890) loss: 0.6933 (0.6862) time: 0.1524 data: 0.0447 max mem: 9377 +Train: [53] [4200/6250] eta: 0:05:40 lr: 0.000060 grad: 0.1708 (0.1888) loss: 0.6722 (0.6861) time: 0.1613 data: 0.0672 max mem: 9377 +Train: [53] [4300/6250] eta: 0:05:24 lr: 0.000060 grad: 0.1724 (0.1885) loss: 0.6868 (0.6859) time: 0.1374 data: 0.0502 max mem: 9377 +Train: [53] [4400/6250] eta: 0:05:06 lr: 0.000060 grad: 0.1762 (0.1882) loss: 0.6747 (0.6859) time: 0.1579 data: 0.0550 max mem: 9377 +Train: [53] [4500/6250] eta: 0:04:49 lr: 0.000060 grad: 0.1754 (0.1879) loss: 0.6700 (0.6857) time: 0.1539 data: 0.0575 max mem: 9377 +Train: [53] [4600/6250] eta: 0:04:32 lr: 0.000060 grad: 0.1733 (0.1876) loss: 0.6817 (0.6856) time: 0.1730 data: 0.0863 max mem: 9377 +Train: [53] [4700/6250] eta: 0:04:15 lr: 0.000060 grad: 0.1633 (0.1874) loss: 0.6842 (0.6855) time: 0.1840 data: 0.0957 max mem: 9377 +Train: [53] [4800/6250] eta: 0:03:58 lr: 0.000060 grad: 0.1729 (0.1871) loss: 0.6890 (0.6854) time: 0.1628 data: 0.0814 max mem: 9377 +Train: [53] [4900/6250] eta: 0:03:42 lr: 0.000060 grad: 0.1746 (0.1868) loss: 0.6764 (0.6854) time: 0.1420 data: 0.0497 max mem: 9377 +Train: [53] [5000/6250] eta: 0:03:25 lr: 0.000060 grad: 0.1704 (0.1866) loss: 0.6768 (0.6852) time: 0.1432 data: 0.0566 max mem: 9377 +Train: [53] [5100/6250] eta: 0:03:08 lr: 0.000060 grad: 0.1679 (0.1863) loss: 0.6694 (0.6851) time: 0.1354 data: 0.0377 max mem: 9377 +Train: [53] [5200/6250] eta: 0:02:52 lr: 0.000060 grad: 0.1708 (0.1861) loss: 0.6733 (0.6849) time: 0.1317 data: 0.0395 max mem: 9377 +Train: [53] [5300/6250] eta: 0:02:35 lr: 0.000060 grad: 0.1754 (0.1859) loss: 0.6756 (0.6848) time: 0.1658 data: 0.0788 max mem: 9377 +Train: [53] [5400/6250] eta: 0:02:19 lr: 0.000060 grad: 0.1800 (0.1857) loss: 0.6474 (0.6846) time: 0.1338 data: 0.0361 max mem: 9377 +Train: [53] [5500/6250] eta: 0:02:02 lr: 0.000060 grad: 0.1748 (0.1855) loss: 0.6814 (0.6844) time: 0.1900 data: 0.1034 max mem: 9377 +Train: [53] [5600/6250] eta: 0:01:46 lr: 0.000060 grad: 0.1735 (0.1853) loss: 0.6806 (0.6844) time: 0.1452 data: 0.0628 max mem: 9377 +Train: [53] [5700/6250] eta: 0:01:29 lr: 0.000060 grad: 0.1725 (0.1851) loss: 0.6912 (0.6844) time: 0.1680 data: 0.0842 max mem: 9377 +Train: [53] [5800/6250] eta: 0:01:13 lr: 0.000060 grad: 0.1768 (0.1848) loss: 0.6788 (0.6844) time: 0.1456 data: 0.0648 max mem: 9377 +Train: [53] [5900/6250] eta: 0:00:57 lr: 0.000060 grad: 0.1741 (0.1847) loss: 0.6699 (0.6844) time: 0.1577 data: 0.0670 max mem: 9377 +Train: [53] [6000/6250] eta: 0:00:40 lr: 0.000059 grad: 0.1687 (0.1845) loss: 0.6804 (0.6843) time: 0.1201 data: 0.0365 max mem: 9377 +Train: [53] [6100/6250] eta: 0:00:24 lr: 0.000059 grad: 0.1752 (0.1845) loss: 0.6820 (0.6841) time: 0.1639 data: 0.0750 max mem: 9377 +Train: [53] [6200/6250] eta: 0:00:08 lr: 0.000059 grad: 0.1760 (0.1843) loss: 0.6701 (0.6840) time: 0.1511 data: 0.0636 max mem: 9377 +Train: [53] [6249/6250] eta: 0:00:00 lr: 0.000059 grad: 0.1719 (0.1843) loss: 0.6688 (0.6839) time: 0.1449 data: 0.0585 max mem: 9377 +Train: [53] Total time: 0:17:04 (0.1639 s / it) +Averaged stats: lr: 0.000059 grad: 0.1719 (0.1843) loss: 0.6688 (0.6839) +Eval (hcp-train-subset): [53] [ 0/62] eta: 0:04:08 loss: 0.8877 (0.8877) time: 4.0158 data: 3.9384 max mem: 9377 +Eval (hcp-train-subset): [53] [61/62] eta: 0:00:00 loss: 0.8935 (0.8968) time: 0.1403 data: 0.1151 max mem: 9377 +Eval (hcp-train-subset): [53] Total time: 0:00:15 (0.2420 s / it) +Averaged stats (hcp-train-subset): loss: 0.8935 (0.8968) +Eval (hcp-val): [53] [ 0/62] eta: 0:05:49 loss: 0.8961 (0.8961) time: 5.6407 data: 5.6104 max mem: 9377 +Eval (hcp-val): [53] [61/62] eta: 0:00:00 loss: 0.8954 (0.8969) time: 0.1370 data: 0.1105 max mem: 9377 +Eval (hcp-val): [53] Total time: 0:00:14 (0.2355 s / it) +Averaged stats (hcp-val): loss: 0.8954 (0.8969) +Eval (nsd-val): [53] [ 0/62] eta: 0:05:05 loss: 0.8822 (0.8822) time: 4.9273 data: 4.8959 max mem: 9377 +Eval (nsd-val): [53] [61/62] eta: 0:00:00 loss: 0.8850 (0.8879) time: 0.1459 data: 0.1202 max mem: 9377 +Eval (nsd-val): [53] Total time: 0:00:14 (0.2269 s / it) +Averaged stats (nsd-val): loss: 0.8850 (0.8879) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [54] [ 0/6250] eta: 10:40:39 lr: 0.000059 grad: 0.2703 (0.2703) loss: 0.6870 (0.6870) time: 6.1502 data: 6.0498 max mem: 9377 +Train: [54] [ 100/6250] eta: 0:21:53 lr: 0.000059 grad: 0.2872 (0.3243) loss: 0.6705 (0.7147) time: 0.1635 data: 0.0613 max mem: 9377 +Train: [54] [ 200/6250] eta: 0:18:50 lr: 0.000059 grad: 0.2923 (0.3043) loss: 0.6730 (0.7044) time: 0.1605 data: 0.0533 max mem: 9377 +Train: [54] [ 300/6250] eta: 0:17:42 lr: 0.000059 grad: 0.2567 (0.2901) loss: 0.6804 (0.6984) time: 0.1458 data: 0.0503 max mem: 9377 +Train: [54] [ 400/6250] eta: 0:16:52 lr: 0.000059 grad: 0.2599 (0.2862) loss: 0.6720 (0.6930) time: 0.1676 data: 0.0735 max mem: 9377 +Train: [54] [ 500/6250] eta: 0:16:16 lr: 0.000059 grad: 0.2127 (0.2771) loss: 0.6954 (0.6912) time: 0.1644 data: 0.0706 max mem: 9377 +Train: [54] [ 600/6250] eta: 0:15:53 lr: 0.000059 grad: 0.1905 (0.2633) loss: 0.6871 (0.6896) time: 0.1758 data: 0.0757 max mem: 9377 +Train: [54] [ 700/6250] eta: 0:15:26 lr: 0.000059 grad: 0.1865 (0.2529) loss: 0.6842 (0.6884) time: 0.1608 data: 0.0697 max mem: 9377 +Train: [54] [ 800/6250] eta: 0:15:05 lr: 0.000059 grad: 0.1799 (0.2447) loss: 0.6841 (0.6876) time: 0.1551 data: 0.0740 max mem: 9377 +Train: [54] [ 900/6250] eta: 0:14:48 lr: 0.000059 grad: 0.1783 (0.2381) loss: 0.6925 (0.6871) time: 0.1528 data: 0.0664 max mem: 9377 +Train: [54] [1000/6250] eta: 0:14:32 lr: 0.000059 grad: 0.1754 (0.2322) loss: 0.6614 (0.6867) time: 0.1665 data: 0.0760 max mem: 9377 +Train: [54] [1100/6250] eta: 0:14:19 lr: 0.000059 grad: 0.1703 (0.2271) loss: 0.6781 (0.6865) time: 0.1584 data: 0.0670 max mem: 9377 +Train: [54] [1200/6250] eta: 0:14:04 lr: 0.000059 grad: 0.1687 (0.2227) loss: 0.6741 (0.6862) time: 0.1839 data: 0.0924 max mem: 9377 +Train: [54] [1300/6250] eta: 0:13:42 lr: 0.000059 grad: 0.1664 (0.2186) loss: 0.6797 (0.6860) time: 0.1623 data: 0.0728 max mem: 9377 +Train: [54] [1400/6250] eta: 0:13:21 lr: 0.000059 grad: 0.1724 (0.2156) loss: 0.6760 (0.6859) time: 0.1373 data: 0.0463 max mem: 9377 +Train: [54] [1500/6250] eta: 0:13:00 lr: 0.000059 grad: 0.1710 (0.2126) loss: 0.6832 (0.6861) time: 0.1462 data: 0.0598 max mem: 9377 +Train: [54] [1600/6250] eta: 0:12:39 lr: 0.000059 grad: 0.1735 (0.2104) loss: 0.6627 (0.6857) time: 0.1438 data: 0.0593 max mem: 9377 +Train: [54] [1700/6250] eta: 0:12:21 lr: 0.000059 grad: 0.1715 (0.2081) loss: 0.6811 (0.6856) time: 0.1629 data: 0.0758 max mem: 9377 +Train: [54] [1800/6250] eta: 0:12:02 lr: 0.000059 grad: 0.1702 (0.2063) loss: 0.6804 (0.6858) time: 0.1474 data: 0.0600 max mem: 9377 +Train: [54] [1900/6250] eta: 0:11:43 lr: 0.000059 grad: 0.1672 (0.2044) loss: 0.6911 (0.6859) time: 0.1451 data: 0.0498 max mem: 9377 +Train: [54] [2000/6250] eta: 0:11:28 lr: 0.000059 grad: 0.1703 (0.2030) loss: 0.6831 (0.6859) time: 0.1524 data: 0.0629 max mem: 9377 +Train: [54] [2100/6250] eta: 0:11:11 lr: 0.000059 grad: 0.1727 (0.2018) loss: 0.6969 (0.6859) time: 0.1538 data: 0.0735 max mem: 9377 +Train: [54] [2200/6250] eta: 0:10:56 lr: 0.000059 grad: 0.1788 (0.2006) loss: 0.6647 (0.6856) time: 0.1565 data: 0.0723 max mem: 9377 +Train: [54] [2300/6250] eta: 0:10:40 lr: 0.000059 grad: 0.1790 (0.1996) loss: 0.6768 (0.6854) time: 0.1805 data: 0.0926 max mem: 9377 +Train: [54] [2400/6250] eta: 0:10:25 lr: 0.000059 grad: 0.1704 (0.1988) loss: 0.6818 (0.6852) time: 0.1845 data: 0.0907 max mem: 9377 +Train: [54] [2500/6250] eta: 0:10:09 lr: 0.000059 grad: 0.1757 (0.1980) loss: 0.6711 (0.6851) time: 0.1660 data: 0.0706 max mem: 9377 +Train: [54] [2600/6250] eta: 0:09:53 lr: 0.000059 grad: 0.1761 (0.1971) loss: 0.6651 (0.6850) time: 0.1666 data: 0.0738 max mem: 9377 +Train: [54] [2700/6250] eta: 0:09:36 lr: 0.000059 grad: 0.1707 (0.1962) loss: 0.6841 (0.6849) time: 0.1684 data: 0.0706 max mem: 9377 +Train: [54] [2800/6250] eta: 0:09:19 lr: 0.000058 grad: 0.1656 (0.1954) loss: 0.6779 (0.6849) time: 0.1521 data: 0.0499 max mem: 9377 +Train: [54] [2900/6250] eta: 0:09:02 lr: 0.000058 grad: 0.1744 (0.1947) loss: 0.6958 (0.6850) time: 0.1472 data: 0.0642 max mem: 9377 +Train: [54] [3000/6250] eta: 0:08:46 lr: 0.000058 grad: 0.1700 (0.1940) loss: 0.6863 (0.6850) time: 0.1604 data: 0.0764 max mem: 9377 +Train: [54] [3100/6250] eta: 0:08:29 lr: 0.000058 grad: 0.1739 (0.1934) loss: 0.6725 (0.6851) time: 0.1409 data: 0.0444 max mem: 9377 +Train: [54] [3200/6250] eta: 0:08:12 lr: 0.000058 grad: 0.1742 (0.1929) loss: 0.6853 (0.6852) time: 0.1657 data: 0.0710 max mem: 9377 +Train: [54] [3300/6250] eta: 0:07:56 lr: 0.000058 grad: 0.1724 (0.1923) loss: 0.6706 (0.6852) time: 0.1532 data: 0.0625 max mem: 9377 +Train: [54] [3400/6250] eta: 0:07:40 lr: 0.000058 grad: 0.1761 (0.1919) loss: 0.6775 (0.6852) time: 0.1469 data: 0.0588 max mem: 9377 +Train: [54] [3500/6250] eta: 0:07:24 lr: 0.000058 grad: 0.1743 (0.1915) loss: 0.6823 (0.6850) time: 0.1632 data: 0.0711 max mem: 9377 +Train: [54] [3600/6250] eta: 0:07:08 lr: 0.000058 grad: 0.1798 (0.1911) loss: 0.6831 (0.6850) time: 0.1687 data: 0.0790 max mem: 9377 +Train: [54] [3700/6250] eta: 0:06:53 lr: 0.000058 grad: 0.1773 (0.1908) loss: 0.6745 (0.6849) time: 0.2810 data: 0.1973 max mem: 9377 +Train: [54] [3800/6250] eta: 0:06:37 lr: 0.000058 grad: 0.1789 (0.1904) loss: 0.6754 (0.6848) time: 0.1917 data: 0.1053 max mem: 9377 +Train: [54] [3900/6250] eta: 0:06:21 lr: 0.000058 grad: 0.1698 (0.1901) loss: 0.6664 (0.6846) time: 0.1904 data: 0.1006 max mem: 9377 +Train: [54] [4000/6250] eta: 0:06:06 lr: 0.000058 grad: 0.1722 (0.1897) loss: 0.6671 (0.6845) time: 0.1677 data: 0.0802 max mem: 9377 +Train: [54] [4100/6250] eta: 0:05:49 lr: 0.000058 grad: 0.1739 (0.1893) loss: 0.6872 (0.6845) time: 0.1406 data: 0.0377 max mem: 9377 +Train: [54] [4200/6250] eta: 0:05:33 lr: 0.000058 grad: 0.1765 (0.1890) loss: 0.6926 (0.6846) time: 0.1669 data: 0.0817 max mem: 9377 +Train: [54] [4300/6250] eta: 0:05:16 lr: 0.000058 grad: 0.1702 (0.1886) loss: 0.6914 (0.6846) time: 0.1407 data: 0.0498 max mem: 9377 +Train: [54] [4400/6250] eta: 0:05:00 lr: 0.000058 grad: 0.1741 (0.1883) loss: 0.6810 (0.6846) time: 0.1536 data: 0.0644 max mem: 9377 +Train: [54] [4500/6250] eta: 0:04:44 lr: 0.000058 grad: 0.1739 (0.1880) loss: 0.6789 (0.6847) time: 0.1600 data: 0.0680 max mem: 9377 +Train: [54] [4600/6250] eta: 0:04:27 lr: 0.000058 grad: 0.1812 (0.1877) loss: 0.6996 (0.6848) time: 0.1455 data: 0.0539 max mem: 9377 +Train: [54] [4700/6250] eta: 0:04:11 lr: 0.000058 grad: 0.1720 (0.1874) loss: 0.6850 (0.6850) time: 0.1554 data: 0.0702 max mem: 9377 +Train: [54] [4800/6250] eta: 0:03:55 lr: 0.000058 grad: 0.1778 (0.1872) loss: 0.6754 (0.6850) time: 0.1303 data: 0.0464 max mem: 9377 +Train: [54] [4900/6250] eta: 0:03:39 lr: 0.000058 grad: 0.1701 (0.1869) loss: 0.6944 (0.6850) time: 0.1436 data: 0.0497 max mem: 9377 +Train: [54] [5000/6250] eta: 0:03:22 lr: 0.000058 grad: 0.1741 (0.1867) loss: 0.6933 (0.6850) time: 0.1493 data: 0.0657 max mem: 9377 +Train: [54] [5100/6250] eta: 0:03:06 lr: 0.000058 grad: 0.1712 (0.1864) loss: 0.6936 (0.6851) time: 0.1881 data: 0.0972 max mem: 9377 +Train: [54] [5200/6250] eta: 0:02:50 lr: 0.000058 grad: 0.1708 (0.1862) loss: 0.6825 (0.6852) time: 0.1659 data: 0.0691 max mem: 9377 +Train: [54] [5300/6250] eta: 0:02:33 lr: 0.000058 grad: 0.1715 (0.1860) loss: 0.6856 (0.6851) time: 0.1530 data: 0.0689 max mem: 9377 +Train: [54] [5400/6250] eta: 0:02:17 lr: 0.000058 grad: 0.1718 (0.1857) loss: 0.6980 (0.6852) time: 0.1648 data: 0.0764 max mem: 9377 +Train: [54] [5500/6250] eta: 0:02:01 lr: 0.000058 grad: 0.1731 (0.1855) loss: 0.6838 (0.6852) time: 0.1687 data: 0.0800 max mem: 9377 +Train: [54] [5600/6250] eta: 0:01:45 lr: 0.000058 grad: 0.1733 (0.1853) loss: 0.6892 (0.6851) time: 0.1572 data: 0.0620 max mem: 9377 +Train: [54] [5700/6250] eta: 0:01:28 lr: 0.000058 grad: 0.1754 (0.1851) loss: 0.6779 (0.6851) time: 0.1616 data: 0.0766 max mem: 9377 +Train: [54] [5800/6250] eta: 0:01:12 lr: 0.000057 grad: 0.1759 (0.1849) loss: 0.6765 (0.6850) time: 0.1420 data: 0.0528 max mem: 9377 +Train: [54] [5900/6250] eta: 0:00:56 lr: 0.000057 grad: 0.1714 (0.1848) loss: 0.6890 (0.6849) time: 0.1311 data: 0.0432 max mem: 9377 +Train: [54] [6000/6250] eta: 0:00:40 lr: 0.000057 grad: 0.1755 (0.1846) loss: 0.6837 (0.6850) time: 0.1542 data: 0.0607 max mem: 9377 +Train: [54] [6100/6250] eta: 0:00:24 lr: 0.000057 grad: 0.1757 (0.1845) loss: 0.6701 (0.6849) time: 0.1346 data: 0.0435 max mem: 9377 +Train: [54] [6200/6250] eta: 0:00:08 lr: 0.000057 grad: 0.1744 (0.1843) loss: 0.6886 (0.6849) time: 0.1316 data: 0.0463 max mem: 9377 +Train: [54] [6249/6250] eta: 0:00:00 lr: 0.000057 grad: 0.1730 (0.1843) loss: 0.6907 (0.6849) time: 0.1223 data: 0.0359 max mem: 9377 +Train: [54] Total time: 0:16:53 (0.1622 s / it) +Averaged stats: lr: 0.000057 grad: 0.1730 (0.1843) loss: 0.6907 (0.6849) +Eval (hcp-train-subset): [54] [ 0/62] eta: 0:06:31 loss: 0.8830 (0.8830) time: 6.3191 data: 6.2895 max mem: 9377 +Eval (hcp-train-subset): [54] [61/62] eta: 0:00:00 loss: 0.8981 (0.8977) time: 0.1397 data: 0.1149 max mem: 9377 +Eval (hcp-train-subset): [54] Total time: 0:00:14 (0.2347 s / it) +Averaged stats (hcp-train-subset): loss: 0.8981 (0.8977) +Making plots (hcp-train-subset): example=20 +Eval (hcp-val): [54] [ 0/62] eta: 0:06:08 loss: 0.8999 (0.8999) time: 5.9400 data: 5.8921 max mem: 9377 +Eval (hcp-val): [54] [61/62] eta: 0:00:00 loss: 0.8929 (0.8968) time: 0.1425 data: 0.1093 max mem: 9377 +Eval (hcp-val): [54] Total time: 0:00:15 (0.2428 s / it) +Averaged stats (hcp-val): loss: 0.8929 (0.8968) +Making plots (hcp-val): example=7 +Eval (nsd-val): [54] [ 0/62] eta: 0:05:48 loss: 0.8797 (0.8797) time: 5.6232 data: 5.5916 max mem: 9377 +Eval (nsd-val): [54] [61/62] eta: 0:00:00 loss: 0.8833 (0.8858) time: 0.1447 data: 0.1174 max mem: 9377 +Eval (nsd-val): [54] Total time: 0:00:14 (0.2366 s / it) +Averaged stats (nsd-val): loss: 0.8833 (0.8858) +Making plots (nsd-val): example=45 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00054.pth +Train: [55] [ 0/6250] eta: 11:13:05 lr: 0.000057 grad: 0.2363 (0.2363) loss: 0.7290 (0.7290) time: 6.4617 data: 6.3155 max mem: 9377 +Train: [55] [ 100/6250] eta: 0:23:22 lr: 0.000057 grad: 0.2542 (0.3862) loss: 0.6682 (0.6696) time: 0.1669 data: 0.0490 max mem: 9377 +Train: [55] [ 200/6250] eta: 0:20:02 lr: 0.000057 grad: 0.3059 (0.3469) loss: 0.6246 (0.6648) time: 0.1856 data: 0.0942 max mem: 9377 +Train: [55] [ 300/6250] eta: 0:18:49 lr: 0.000057 grad: 0.2591 (0.3261) loss: 0.6362 (0.6620) time: 0.1818 data: 0.0824 max mem: 9377 +Train: [55] [ 400/6250] eta: 0:18:20 lr: 0.000057 grad: 0.2463 (0.3069) loss: 0.6601 (0.6603) time: 0.2022 data: 0.0984 max mem: 9377 +Train: [55] [ 500/6250] eta: 0:17:55 lr: 0.000057 grad: 0.2056 (0.2892) loss: 0.6676 (0.6618) time: 0.1904 data: 0.1029 max mem: 9377 +Train: [55] [ 600/6250] eta: 0:17:44 lr: 0.000057 grad: 0.1999 (0.2760) loss: 0.6822 (0.6642) time: 0.1630 data: 0.0757 max mem: 9377 +Train: [55] [ 700/6250] eta: 0:17:20 lr: 0.000057 grad: 0.1882 (0.2674) loss: 0.6889 (0.6658) time: 0.1824 data: 0.0836 max mem: 9377 +Train: [55] [ 800/6250] eta: 0:16:58 lr: 0.000057 grad: 0.1856 (0.2584) loss: 0.6613 (0.6668) time: 0.1934 data: 0.0977 max mem: 9377 +Train: [55] [ 900/6250] eta: 0:16:26 lr: 0.000057 grad: 0.1849 (0.2511) loss: 0.6816 (0.6679) time: 0.1285 data: 0.0368 max mem: 9377 +Train: [55] [1000/6250] eta: 0:15:58 lr: 0.000057 grad: 0.1821 (0.2445) loss: 0.6923 (0.6690) time: 0.1861 data: 0.0986 max mem: 9377 +Train: [55] [1100/6250] eta: 0:15:37 lr: 0.000057 grad: 0.1751 (0.2386) loss: 0.6826 (0.6704) time: 0.1644 data: 0.0726 max mem: 9377 +Train: [55] [1200/6250] eta: 0:15:15 lr: 0.000057 grad: 0.1764 (0.2334) loss: 0.6866 (0.6717) time: 0.1816 data: 0.0820 max mem: 9377 +Train: [55] [1300/6250] eta: 0:14:51 lr: 0.000057 grad: 0.1782 (0.2291) loss: 0.6726 (0.6725) time: 0.1758 data: 0.0899 max mem: 9377 +Train: [55] [1400/6250] eta: 0:14:26 lr: 0.000057 grad: 0.1808 (0.2256) loss: 0.6718 (0.6728) time: 0.1594 data: 0.0610 max mem: 9377 +Train: [55] [1500/6250] eta: 0:14:03 lr: 0.000057 grad: 0.1746 (0.2224) loss: 0.6842 (0.6733) time: 0.1750 data: 0.0849 max mem: 9377 +Train: [55] [1600/6250] eta: 0:13:40 lr: 0.000057 grad: 0.1816 (0.2202) loss: 0.6736 (0.6732) time: 0.1784 data: 0.0939 max mem: 9377 +Train: [55] [1700/6250] eta: 0:13:18 lr: 0.000057 grad: 0.1902 (0.2181) loss: 0.6585 (0.6732) time: 0.1818 data: 0.0962 max mem: 9377 +Train: [55] [1800/6250] eta: 0:12:56 lr: 0.000057 grad: 0.1779 (0.2161) loss: 0.6741 (0.6732) time: 0.1576 data: 0.0747 max mem: 9377 +Train: [55] [1900/6250] eta: 0:12:38 lr: 0.000057 grad: 0.1803 (0.2144) loss: 0.6637 (0.6731) time: 0.1467 data: 0.0563 max mem: 9377 +Train: [55] [2000/6250] eta: 0:12:19 lr: 0.000057 grad: 0.1826 (0.2128) loss: 0.6775 (0.6732) time: 0.1776 data: 0.0886 max mem: 9377 +Train: [55] [2100/6250] eta: 0:12:00 lr: 0.000057 grad: 0.1846 (0.2113) loss: 0.6712 (0.6736) time: 0.1799 data: 0.0835 max mem: 9377 +Train: [55] [2200/6250] eta: 0:11:40 lr: 0.000057 grad: 0.1773 (0.2099) loss: 0.6725 (0.6737) time: 0.1481 data: 0.0689 max mem: 9377 +Train: [55] [2300/6250] eta: 0:11:21 lr: 0.000057 grad: 0.1788 (0.2088) loss: 0.6704 (0.6739) time: 0.1693 data: 0.0836 max mem: 9377 +Train: [55] [2400/6250] eta: 0:11:04 lr: 0.000057 grad: 0.1801 (0.2074) loss: 0.6748 (0.6740) time: 0.1721 data: 0.0760 max mem: 9377 +Train: [55] [2500/6250] eta: 0:10:48 lr: 0.000057 grad: 0.1768 (0.2063) loss: 0.6842 (0.6740) time: 0.1789 data: 0.0967 max mem: 9377 +Train: [55] [2600/6250] eta: 0:10:29 lr: 0.000056 grad: 0.1714 (0.2052) loss: 0.6876 (0.6742) time: 0.1807 data: 0.0878 max mem: 9377 +Train: [55] [2700/6250] eta: 0:10:10 lr: 0.000056 grad: 0.1755 (0.2043) loss: 0.6676 (0.6743) time: 0.1536 data: 0.0641 max mem: 9377 +Train: [55] [2800/6250] eta: 0:09:51 lr: 0.000056 grad: 0.1789 (0.2033) loss: 0.6645 (0.6743) time: 0.1537 data: 0.0564 max mem: 9377 +Train: [55] [2900/6250] eta: 0:09:33 lr: 0.000056 grad: 0.1746 (0.2023) loss: 0.6750 (0.6745) time: 0.1577 data: 0.0617 max mem: 9377 +Train: [55] [3000/6250] eta: 0:09:14 lr: 0.000056 grad: 0.1744 (0.2014) loss: 0.6818 (0.6749) time: 0.1584 data: 0.0802 max mem: 9377 +Train: [55] [3100/6250] eta: 0:08:56 lr: 0.000056 grad: 0.1750 (0.2005) loss: 0.6849 (0.6753) time: 0.1590 data: 0.0737 max mem: 9377 +Train: [55] [3200/6250] eta: 0:08:39 lr: 0.000056 grad: 0.1744 (0.1998) loss: 0.6746 (0.6755) time: 0.1853 data: 0.1026 max mem: 9377 +Train: [55] [3300/6250] eta: 0:08:20 lr: 0.000056 grad: 0.1664 (0.1991) loss: 0.6958 (0.6758) time: 0.1617 data: 0.0753 max mem: 9377 +Train: [55] [3400/6250] eta: 0:08:03 lr: 0.000056 grad: 0.1793 (0.1984) loss: 0.6724 (0.6762) time: 0.1621 data: 0.0802 max mem: 9377 +Train: [55] [3500/6250] eta: 0:07:45 lr: 0.000056 grad: 0.1767 (0.1978) loss: 0.6808 (0.6765) time: 0.1569 data: 0.0655 max mem: 9377 +Train: [55] [3600/6250] eta: 0:07:28 lr: 0.000056 grad: 0.1697 (0.1972) loss: 0.6923 (0.6768) time: 0.1933 data: 0.0989 max mem: 9377 +Train: [55] [3700/6250] eta: 0:07:10 lr: 0.000056 grad: 0.1700 (0.1967) loss: 0.7006 (0.6771) time: 0.1457 data: 0.0663 max mem: 9377 +Train: [55] [3800/6250] eta: 0:06:52 lr: 0.000056 grad: 0.1744 (0.1962) loss: 0.6892 (0.6773) time: 0.1626 data: 0.0744 max mem: 9377 +Train: [55] [3900/6250] eta: 0:06:35 lr: 0.000056 grad: 0.1727 (0.1957) loss: 0.6916 (0.6775) time: 0.1658 data: 0.0775 max mem: 9377 +Train: [55] [4000/6250] eta: 0:06:17 lr: 0.000056 grad: 0.1704 (0.1952) loss: 0.6832 (0.6778) time: 0.1407 data: 0.0482 max mem: 9377 +Train: [55] [4100/6250] eta: 0:06:00 lr: 0.000056 grad: 0.1792 (0.1948) loss: 0.6746 (0.6779) time: 0.1399 data: 0.0456 max mem: 9377 +Train: [55] [4200/6250] eta: 0:05:43 lr: 0.000056 grad: 0.1816 (0.1944) loss: 0.6795 (0.6781) time: 0.1413 data: 0.0404 max mem: 9377 +Train: [55] [4300/6250] eta: 0:05:25 lr: 0.000056 grad: 0.1863 (0.1941) loss: 0.6669 (0.6781) time: 0.1526 data: 0.0581 max mem: 9377 +Train: [55] [4400/6250] eta: 0:05:08 lr: 0.000056 grad: 0.1789 (0.1937) loss: 0.6673 (0.6781) time: 0.1618 data: 0.0729 max mem: 9377 +Train: [55] [4500/6250] eta: 0:04:52 lr: 0.000056 grad: 0.1748 (0.1934) loss: 0.6903 (0.6781) time: 0.1698 data: 0.0734 max mem: 9377 +Train: [55] [4600/6250] eta: 0:04:35 lr: 0.000056 grad: 0.1810 (0.1931) loss: 0.6717 (0.6780) time: 0.1275 data: 0.0329 max mem: 9377 +Train: [55] [4700/6250] eta: 0:04:17 lr: 0.000056 grad: 0.1800 (0.1928) loss: 0.6777 (0.6779) time: 0.1526 data: 0.0577 max mem: 9377 +Train: [55] [4800/6250] eta: 0:04:00 lr: 0.000056 grad: 0.1747 (0.1925) loss: 0.6630 (0.6778) time: 0.1411 data: 0.0542 max mem: 9377 +Train: [55] [4900/6250] eta: 0:03:43 lr: 0.000056 grad: 0.1785 (0.1922) loss: 0.6636 (0.6778) time: 0.1660 data: 0.0782 max mem: 9377 +Train: [55] [5000/6250] eta: 0:03:27 lr: 0.000056 grad: 0.1735 (0.1919) loss: 0.6781 (0.6778) time: 0.1708 data: 0.0820 max mem: 9377 +Train: [55] [5100/6250] eta: 0:03:10 lr: 0.000056 grad: 0.1715 (0.1915) loss: 0.6777 (0.6778) time: 0.1624 data: 0.0806 max mem: 9377 +Train: [55] [5200/6250] eta: 0:02:53 lr: 0.000056 grad: 0.1822 (0.1913) loss: 0.6739 (0.6778) time: 0.1732 data: 0.0895 max mem: 9377 +Train: [55] [5300/6250] eta: 0:02:36 lr: 0.000056 grad: 0.1735 (0.1910) loss: 0.6757 (0.6779) time: 0.1564 data: 0.0697 max mem: 9377 +Train: [55] [5400/6250] eta: 0:02:20 lr: 0.000056 grad: 0.1719 (0.1906) loss: 0.6916 (0.6779) time: 0.1765 data: 0.0910 max mem: 9377 +Train: [55] [5500/6250] eta: 0:02:03 lr: 0.000056 grad: 0.1665 (0.1903) loss: 0.6778 (0.6780) time: 0.1389 data: 0.0361 max mem: 9377 +Train: [55] [5600/6250] eta: 0:01:46 lr: 0.000055 grad: 0.1752 (0.1901) loss: 0.6746 (0.6781) time: 0.1663 data: 0.0790 max mem: 9377 +Train: [55] [5700/6250] eta: 0:01:30 lr: 0.000055 grad: 0.1731 (0.1898) loss: 0.6832 (0.6781) time: 0.1414 data: 0.0527 max mem: 9377 +Train: [55] [5800/6250] eta: 0:01:13 lr: 0.000055 grad: 0.1756 (0.1896) loss: 0.6804 (0.6783) time: 0.1647 data: 0.0770 max mem: 9377 +Train: [55] [5900/6250] eta: 0:00:57 lr: 0.000055 grad: 0.1729 (0.1894) loss: 0.6691 (0.6784) time: 0.1426 data: 0.0497 max mem: 9377 +Train: [55] [6000/6250] eta: 0:00:40 lr: 0.000055 grad: 0.1763 (0.1892) loss: 0.6724 (0.6784) time: 0.1644 data: 0.0717 max mem: 9377 +Train: [55] [6100/6250] eta: 0:00:24 lr: 0.000055 grad: 0.1720 (0.1889) loss: 0.6802 (0.6786) time: 0.1647 data: 0.0730 max mem: 9377 +Train: [55] [6200/6250] eta: 0:00:08 lr: 0.000055 grad: 0.1707 (0.1887) loss: 0.6875 (0.6787) time: 0.1947 data: 0.0996 max mem: 9377 +Train: [55] [6249/6250] eta: 0:00:00 lr: 0.000055 grad: 0.1721 (0.1886) loss: 0.6792 (0.6787) time: 0.2107 data: 0.1256 max mem: 9377 +Train: [55] Total time: 0:17:12 (0.1652 s / it) +Averaged stats: lr: 0.000055 grad: 0.1721 (0.1886) loss: 0.6792 (0.6787) +Eval (hcp-train-subset): [55] [ 0/62] eta: 0:04:49 loss: 0.8908 (0.8908) time: 4.6675 data: 4.5933 max mem: 9377 +Eval (hcp-train-subset): [55] [61/62] eta: 0:00:00 loss: 0.8965 (0.8963) time: 0.1574 data: 0.1276 max mem: 9377 +Eval (hcp-train-subset): [55] Total time: 0:00:16 (0.2629 s / it) +Averaged stats (hcp-train-subset): loss: 0.8965 (0.8963) +Eval (hcp-val): [55] [ 0/62] eta: 0:04:55 loss: 0.8947 (0.8947) time: 4.7610 data: 4.7020 max mem: 9377 +Eval (hcp-val): [55] [61/62] eta: 0:00:00 loss: 0.8941 (0.8968) time: 0.1475 data: 0.1217 max mem: 9377 +Eval (hcp-val): [55] Total time: 0:00:15 (0.2580 s / it) +Averaged stats (hcp-val): loss: 0.8941 (0.8968) +Eval (nsd-val): [55] [ 0/62] eta: 0:05:10 loss: 0.8752 (0.8752) time: 5.0080 data: 4.9267 max mem: 9377 +Eval (nsd-val): [55] [61/62] eta: 0:00:00 loss: 0.8849 (0.8850) time: 0.1541 data: 0.1283 max mem: 9377 +Eval (nsd-val): [55] Total time: 0:00:15 (0.2548 s / it) +Averaged stats (nsd-val): loss: 0.8849 (0.8850) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [56] [ 0/6250] eta: 9:36:05 lr: 0.000055 grad: 0.2075 (0.2075) loss: 0.7047 (0.7047) time: 5.5305 data: 5.0845 max mem: 9377 +Train: [56] [ 100/6250] eta: 0:25:44 lr: 0.000055 grad: 0.3106 (0.3409) loss: 0.7141 (0.7260) time: 0.1810 data: 0.0672 max mem: 9377 +Train: [56] [ 200/6250] eta: 0:22:38 lr: 0.000055 grad: 0.3403 (0.3498) loss: 0.6778 (0.7097) time: 0.2020 data: 0.0948 max mem: 9377 +Train: [56] [ 300/6250] eta: 0:20:59 lr: 0.000055 grad: 0.2187 (0.3203) loss: 0.6864 (0.7032) time: 0.1947 data: 0.0879 max mem: 9377 +Train: [56] [ 400/6250] eta: 0:20:01 lr: 0.000055 grad: 0.2191 (0.2975) loss: 0.6921 (0.6991) time: 0.1722 data: 0.0690 max mem: 9377 +Train: [56] [ 500/6250] eta: 0:19:02 lr: 0.000055 grad: 0.2123 (0.2813) loss: 0.6903 (0.6972) time: 0.1566 data: 0.0521 max mem: 9377 +Train: [56] [ 600/6250] eta: 0:18:26 lr: 0.000055 grad: 0.2218 (0.2719) loss: 0.6869 (0.6954) time: 0.1842 data: 0.0981 max mem: 9377 +Train: [56] [ 700/6250] eta: 0:18:03 lr: 0.000055 grad: 0.2160 (0.2638) loss: 0.6772 (0.6942) time: 0.1711 data: 0.0769 max mem: 9377 +Train: [56] [ 800/6250] eta: 0:17:29 lr: 0.000055 grad: 0.1796 (0.2551) loss: 0.6864 (0.6935) time: 0.1716 data: 0.0822 max mem: 9377 +Train: [56] [ 900/6250] eta: 0:17:01 lr: 0.000055 grad: 0.1886 (0.2477) loss: 0.6749 (0.6922) time: 0.1766 data: 0.0873 max mem: 9377 +Train: [56] [1000/6250] eta: 0:16:25 lr: 0.000055 grad: 0.1813 (0.2415) loss: 0.6765 (0.6909) time: 0.1400 data: 0.0399 max mem: 9377 +Train: [56] [1100/6250] eta: 0:16:03 lr: 0.000055 grad: 0.1776 (0.2360) loss: 0.6761 (0.6897) time: 0.1759 data: 0.0829 max mem: 9377 +Train: [56] [1200/6250] eta: 0:15:36 lr: 0.000055 grad: 0.1771 (0.2314) loss: 0.6531 (0.6881) time: 0.1859 data: 0.0994 max mem: 9377 +Train: [56] [1300/6250] eta: 0:15:10 lr: 0.000055 grad: 0.1762 (0.2274) loss: 0.6748 (0.6871) time: 0.1611 data: 0.0796 max mem: 9377 +Train: [56] [1400/6250] eta: 0:14:46 lr: 0.000055 grad: 0.1818 (0.2239) loss: 0.6609 (0.6863) time: 0.1818 data: 0.0978 max mem: 9377 +Train: [56] [1500/6250] eta: 0:14:18 lr: 0.000055 grad: 0.1747 (0.2212) loss: 0.6847 (0.6853) time: 0.1531 data: 0.0548 max mem: 9377 +Train: [56] [1600/6250] eta: 0:13:55 lr: 0.000055 grad: 0.1762 (0.2188) loss: 0.6669 (0.6844) time: 0.1605 data: 0.0655 max mem: 9377 +Train: [56] [1700/6250] eta: 0:13:29 lr: 0.000055 grad: 0.1725 (0.2162) loss: 0.6817 (0.6837) time: 0.1377 data: 0.0461 max mem: 9377 +Train: [56] [1800/6250] eta: 0:13:10 lr: 0.000055 grad: 0.1722 (0.2141) loss: 0.6721 (0.6832) time: 0.1720 data: 0.0875 max mem: 9377 +Train: [56] [1900/6250] eta: 0:12:48 lr: 0.000055 grad: 0.1772 (0.2122) loss: 0.6818 (0.6825) time: 0.1715 data: 0.0881 max mem: 9377 +Train: [56] [2000/6250] eta: 0:12:30 lr: 0.000055 grad: 0.1786 (0.2105) loss: 0.6704 (0.6819) time: 0.1621 data: 0.0781 max mem: 9377 +Train: [56] [2100/6250] eta: 0:12:09 lr: 0.000055 grad: 0.1767 (0.2091) loss: 0.6592 (0.6814) time: 0.1612 data: 0.0731 max mem: 9377 +Train: [56] [2200/6250] eta: 0:11:50 lr: 0.000055 grad: 0.1821 (0.2077) loss: 0.6757 (0.6809) time: 0.1956 data: 0.1063 max mem: 9377 +Train: [56] [2300/6250] eta: 0:11:28 lr: 0.000055 grad: 0.1790 (0.2066) loss: 0.6533 (0.6805) time: 0.1760 data: 0.0863 max mem: 9377 +Train: [56] [2400/6250] eta: 0:11:08 lr: 0.000054 grad: 0.1786 (0.2054) loss: 0.6597 (0.6802) time: 0.1555 data: 0.0564 max mem: 9377 +Train: [56] [2500/6250] eta: 0:10:49 lr: 0.000054 grad: 0.1810 (0.2045) loss: 0.6781 (0.6800) time: 0.1614 data: 0.0760 max mem: 9377 +Train: [56] [2600/6250] eta: 0:10:31 lr: 0.000054 grad: 0.1782 (0.2036) loss: 0.6812 (0.6802) time: 0.1446 data: 0.0642 max mem: 9377 +Train: [56] [2700/6250] eta: 0:10:11 lr: 0.000054 grad: 0.1763 (0.2026) loss: 0.6759 (0.6803) time: 0.1537 data: 0.0784 max mem: 9377 +Train: [56] [2800/6250] eta: 0:09:51 lr: 0.000054 grad: 0.1727 (0.2017) loss: 0.6801 (0.6803) time: 0.1571 data: 0.0703 max mem: 9377 +Train: [56] [2900/6250] eta: 0:09:31 lr: 0.000054 grad: 0.1756 (0.2009) loss: 0.6746 (0.6801) time: 0.1554 data: 0.0660 max mem: 9377 +Train: [56] [3000/6250] eta: 0:09:12 lr: 0.000054 grad: 0.1830 (0.2002) loss: 0.6762 (0.6801) time: 0.1665 data: 0.0740 max mem: 9377 +Train: [56] [3100/6250] eta: 0:08:53 lr: 0.000054 grad: 0.1747 (0.1995) loss: 0.6770 (0.6802) time: 0.1651 data: 0.0665 max mem: 9377 +Train: [56] [3200/6250] eta: 0:08:34 lr: 0.000054 grad: 0.1795 (0.1989) loss: 0.6809 (0.6801) time: 0.1492 data: 0.0675 max mem: 9377 +Train: [56] [3300/6250] eta: 0:08:16 lr: 0.000054 grad: 0.1744 (0.1983) loss: 0.6735 (0.6801) time: 0.2028 data: 0.1158 max mem: 9377 +Train: [56] [3400/6250] eta: 0:07:57 lr: 0.000054 grad: 0.1757 (0.1976) loss: 0.6827 (0.6800) time: 0.1290 data: 0.0379 max mem: 9377 +Train: [56] [3500/6250] eta: 0:07:39 lr: 0.000054 grad: 0.1717 (0.1971) loss: 0.6710 (0.6799) time: 0.1711 data: 0.0949 max mem: 9377 +Train: [56] [3600/6250] eta: 0:07:22 lr: 0.000054 grad: 0.1785 (0.1966) loss: 0.6723 (0.6798) time: 0.1505 data: 0.0715 max mem: 9377 +Train: [56] [3700/6250] eta: 0:07:05 lr: 0.000054 grad: 0.1805 (0.1961) loss: 0.6721 (0.6797) time: 0.1544 data: 0.0725 max mem: 9377 +Train: [56] [3800/6250] eta: 0:06:49 lr: 0.000054 grad: 0.1793 (0.1957) loss: 0.6655 (0.6796) time: 0.1409 data: 0.0563 max mem: 9377 +Train: [56] [3900/6250] eta: 0:06:33 lr: 0.000054 grad: 0.1776 (0.1952) loss: 0.6812 (0.6795) time: 0.1837 data: 0.0925 max mem: 9377 +Train: [56] [4000/6250] eta: 0:06:17 lr: 0.000054 grad: 0.1687 (0.1948) loss: 0.6954 (0.6796) time: 0.1660 data: 0.0803 max mem: 9377 +Train: [56] [4100/6250] eta: 0:06:00 lr: 0.000054 grad: 0.1704 (0.1944) loss: 0.6832 (0.6795) time: 0.1225 data: 0.0279 max mem: 9377 +Train: [56] [4200/6250] eta: 0:05:43 lr: 0.000054 grad: 0.1782 (0.1940) loss: 0.6781 (0.6794) time: 0.1390 data: 0.0564 max mem: 9377 +Train: [56] [4300/6250] eta: 0:05:26 lr: 0.000054 grad: 0.1773 (0.1936) loss: 0.6797 (0.6794) time: 0.1796 data: 0.0962 max mem: 9377 +Train: [56] [4400/6250] eta: 0:05:09 lr: 0.000054 grad: 0.1717 (0.1933) loss: 0.6836 (0.6793) time: 0.1766 data: 0.0995 max mem: 9377 +Train: [56] [4500/6250] eta: 0:04:52 lr: 0.000054 grad: 0.1718 (0.1929) loss: 0.6793 (0.6794) time: 0.1792 data: 0.0950 max mem: 9377 +Train: [56] [4600/6250] eta: 0:04:36 lr: 0.000054 grad: 0.1727 (0.1926) loss: 0.6728 (0.6793) time: 0.1918 data: 0.1069 max mem: 9377 +Train: [56] [4700/6250] eta: 0:04:19 lr: 0.000054 grad: 0.1792 (0.1923) loss: 0.6730 (0.6792) time: 0.1700 data: 0.0953 max mem: 9377 +Train: [56] [4800/6250] eta: 0:04:02 lr: 0.000054 grad: 0.1798 (0.1921) loss: 0.6843 (0.6792) time: 0.1573 data: 0.0707 max mem: 9377 +Train: [56] [4900/6250] eta: 0:03:45 lr: 0.000054 grad: 0.1822 (0.1920) loss: 0.6703 (0.6792) time: 0.1683 data: 0.0843 max mem: 9377 +Train: [56] [5000/6250] eta: 0:03:29 lr: 0.000054 grad: 0.1738 (0.1917) loss: 0.6868 (0.6791) time: 0.2097 data: 0.1312 max mem: 9377 +Train: [56] [5100/6250] eta: 0:03:12 lr: 0.000054 grad: 0.1696 (0.1914) loss: 0.6820 (0.6790) time: 0.1466 data: 0.0642 max mem: 9377 +Train: [56] [5200/6250] eta: 0:02:55 lr: 0.000054 grad: 0.1718 (0.1911) loss: 0.6820 (0.6790) time: 0.1421 data: 0.0587 max mem: 9377 +Train: [56] [5300/6250] eta: 0:02:38 lr: 0.000054 grad: 0.1815 (0.1909) loss: 0.6585 (0.6789) time: 0.1750 data: 0.0931 max mem: 9377 +Train: [56] [5400/6250] eta: 0:02:21 lr: 0.000054 grad: 0.1795 (0.1907) loss: 0.6725 (0.6787) time: 0.1474 data: 0.0537 max mem: 9377 +Train: [56] [5500/6250] eta: 0:02:05 lr: 0.000053 grad: 0.1759 (0.1904) loss: 0.6770 (0.6786) time: 0.1744 data: 0.0878 max mem: 9377 +Train: [56] [5600/6250] eta: 0:01:48 lr: 0.000053 grad: 0.1774 (0.1902) loss: 0.6685 (0.6785) time: 0.1750 data: 0.0953 max mem: 9377 +Train: [56] [5700/6250] eta: 0:01:31 lr: 0.000053 grad: 0.1751 (0.1900) loss: 0.6715 (0.6784) time: 0.1617 data: 0.0709 max mem: 9377 +Train: [56] [5800/6250] eta: 0:01:14 lr: 0.000053 grad: 0.1683 (0.1898) loss: 0.6817 (0.6783) time: 0.1536 data: 0.0713 max mem: 9377 +Train: [56] [5900/6250] eta: 0:00:58 lr: 0.000053 grad: 0.1771 (0.1896) loss: 0.6670 (0.6783) time: 0.1538 data: 0.0645 max mem: 9377 +Train: [56] [6000/6250] eta: 0:00:41 lr: 0.000053 grad: 0.1755 (0.1896) loss: 0.6642 (0.6781) time: 0.1551 data: 0.0685 max mem: 9377 +Train: [56] [6100/6250] eta: 0:00:24 lr: 0.000053 grad: 0.1802 (0.1894) loss: 0.6792 (0.6779) time: 0.1537 data: 0.0611 max mem: 9377 +Train: [56] [6200/6250] eta: 0:00:08 lr: 0.000053 grad: 0.1822 (0.1893) loss: 0.6628 (0.6779) time: 0.1428 data: 0.0469 max mem: 9377 +Train: [56] [6249/6250] eta: 0:00:00 lr: 0.000053 grad: 0.1745 (0.1892) loss: 0.6869 (0.6779) time: 0.1509 data: 0.0560 max mem: 9377 +Train: [56] Total time: 0:17:23 (0.1669 s / it) +Averaged stats: lr: 0.000053 grad: 0.1745 (0.1892) loss: 0.6869 (0.6779) +Eval (hcp-train-subset): [56] [ 0/62] eta: 0:06:12 loss: 0.8880 (0.8880) time: 6.0151 data: 5.9836 max mem: 9377 +Eval (hcp-train-subset): [56] [61/62] eta: 0:00:00 loss: 0.8951 (0.8969) time: 0.1233 data: 0.0964 max mem: 9377 +Eval (hcp-train-subset): [56] Total time: 0:00:14 (0.2388 s / it) +Averaged stats (hcp-train-subset): loss: 0.8951 (0.8969) +Eval (hcp-val): [56] [ 0/62] eta: 0:06:05 loss: 0.8996 (0.8996) time: 5.9027 data: 5.8716 max mem: 9377 +Eval (hcp-val): [56] [61/62] eta: 0:00:00 loss: 0.8940 (0.8963) time: 0.1218 data: 0.0968 max mem: 9377 +Eval (hcp-val): [56] Total time: 0:00:14 (0.2393 s / it) +Averaged stats (hcp-val): loss: 0.8940 (0.8963) +Eval (nsd-val): [56] [ 0/62] eta: 0:05:29 loss: 0.8836 (0.8836) time: 5.3140 data: 5.2779 max mem: 9377 +Eval (nsd-val): [56] [61/62] eta: 0:00:00 loss: 0.8893 (0.8914) time: 0.1483 data: 0.1227 max mem: 9377 +Eval (nsd-val): [56] Total time: 0:00:14 (0.2370 s / it) +Averaged stats (nsd-val): loss: 0.8893 (0.8914) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [57] [ 0/6250] eta: 9:53:57 lr: 0.000053 grad: 0.1795 (0.1795) loss: 0.7270 (0.7270) time: 5.7020 data: 5.5341 max mem: 9377 +Train: [57] [ 100/6250] eta: 0:22:25 lr: 0.000053 grad: 0.3627 (0.3605) loss: 0.6898 (0.7106) time: 0.1852 data: 0.0755 max mem: 9377 +Train: [57] [ 200/6250] eta: 0:20:01 lr: 0.000053 grad: 0.3090 (0.3593) loss: 0.6907 (0.7011) time: 0.1878 data: 0.0822 max mem: 9377 +Train: [57] [ 300/6250] eta: 0:18:42 lr: 0.000053 grad: 0.2430 (0.3347) loss: 0.6911 (0.6997) time: 0.1782 data: 0.0843 max mem: 9377 +Train: [57] [ 400/6250] eta: 0:17:52 lr: 0.000053 grad: 0.2335 (0.3105) loss: 0.6840 (0.6981) time: 0.1606 data: 0.0663 max mem: 9377 +Train: [57] [ 500/6250] eta: 0:16:59 lr: 0.000053 grad: 0.1957 (0.2921) loss: 0.6749 (0.6955) time: 0.1608 data: 0.0650 max mem: 9377 +Train: [57] [ 600/6250] eta: 0:16:28 lr: 0.000053 grad: 0.1905 (0.2761) loss: 0.6902 (0.6943) time: 0.1772 data: 0.0823 max mem: 9377 +Train: [57] [ 700/6250] eta: 0:15:58 lr: 0.000053 grad: 0.1991 (0.2641) loss: 0.6900 (0.6927) time: 0.1411 data: 0.0562 max mem: 9377 +Train: [57] [ 800/6250] eta: 0:15:45 lr: 0.000053 grad: 0.1908 (0.2557) loss: 0.6689 (0.6912) time: 0.1629 data: 0.0682 max mem: 9377 +Train: [57] [ 900/6250] eta: 0:15:20 lr: 0.000053 grad: 0.1811 (0.2479) loss: 0.6706 (0.6897) time: 0.1639 data: 0.0766 max mem: 9377 +Train: [57] [1000/6250] eta: 0:15:01 lr: 0.000053 grad: 0.1847 (0.2413) loss: 0.6581 (0.6879) time: 0.1578 data: 0.0761 max mem: 9377 +Train: [57] [1100/6250] eta: 0:14:37 lr: 0.000053 grad: 0.1800 (0.2359) loss: 0.6751 (0.6866) time: 0.1565 data: 0.0603 max mem: 9377 +Train: [57] [1200/6250] eta: 0:14:19 lr: 0.000053 grad: 0.1800 (0.2315) loss: 0.6652 (0.6852) time: 0.1564 data: 0.0714 max mem: 9377 +Train: [57] [1300/6250] eta: 0:13:56 lr: 0.000053 grad: 0.1713 (0.2276) loss: 0.6708 (0.6843) time: 0.1604 data: 0.0773 max mem: 9377 +Train: [57] [1400/6250] eta: 0:13:37 lr: 0.000053 grad: 0.1828 (0.2243) loss: 0.6784 (0.6837) time: 0.1501 data: 0.0595 max mem: 9377 +Train: [57] [1500/6250] eta: 0:13:18 lr: 0.000053 grad: 0.1745 (0.2215) loss: 0.6760 (0.6833) time: 0.1689 data: 0.0780 max mem: 9377 +Train: [57] [1600/6250] eta: 0:12:57 lr: 0.000053 grad: 0.1725 (0.2187) loss: 0.6788 (0.6832) time: 0.1587 data: 0.0665 max mem: 9377 +Train: [57] [1700/6250] eta: 0:12:36 lr: 0.000053 grad: 0.1798 (0.2164) loss: 0.6783 (0.6830) time: 0.1427 data: 0.0637 max mem: 9377 +Train: [57] [1800/6250] eta: 0:12:16 lr: 0.000053 grad: 0.1796 (0.2145) loss: 0.6838 (0.6828) time: 0.1456 data: 0.0564 max mem: 9377 +Train: [57] [1900/6250] eta: 0:11:57 lr: 0.000053 grad: 0.1816 (0.2126) loss: 0.6747 (0.6827) time: 0.1606 data: 0.0672 max mem: 9377 +Train: [57] [2000/6250] eta: 0:11:40 lr: 0.000053 grad: 0.1784 (0.2111) loss: 0.6687 (0.6825) time: 0.1626 data: 0.0748 max mem: 9377 +Train: [57] [2100/6250] eta: 0:11:23 lr: 0.000053 grad: 0.1840 (0.2099) loss: 0.6858 (0.6823) time: 0.1399 data: 0.0454 max mem: 9377 +Train: [57] [2200/6250] eta: 0:11:06 lr: 0.000053 grad: 0.1711 (0.2085) loss: 0.6872 (0.6822) time: 0.1667 data: 0.0780 max mem: 9377 +Train: [57] [2300/6250] eta: 0:10:49 lr: 0.000052 grad: 0.1759 (0.2075) loss: 0.6831 (0.6819) time: 0.1554 data: 0.0666 max mem: 9377 +Train: [57] [2400/6250] eta: 0:10:32 lr: 0.000052 grad: 0.1841 (0.2064) loss: 0.6679 (0.6815) time: 0.1634 data: 0.0721 max mem: 9377 +Train: [57] [2500/6250] eta: 0:10:15 lr: 0.000052 grad: 0.1843 (0.2056) loss: 0.6697 (0.6810) time: 0.1658 data: 0.0828 max mem: 9377 +Train: [57] [2600/6250] eta: 0:09:58 lr: 0.000052 grad: 0.1806 (0.2046) loss: 0.6708 (0.6804) time: 0.1481 data: 0.0558 max mem: 9377 +Train: [57] [2700/6250] eta: 0:09:41 lr: 0.000052 grad: 0.1795 (0.2038) loss: 0.6646 (0.6799) time: 0.1400 data: 0.0490 max mem: 9377 +Train: [57] [2800/6250] eta: 0:09:23 lr: 0.000052 grad: 0.1785 (0.2030) loss: 0.6701 (0.6795) time: 0.1456 data: 0.0445 max mem: 9377 +Train: [57] [2900/6250] eta: 0:09:05 lr: 0.000052 grad: 0.1729 (0.2025) loss: 0.6889 (0.6793) time: 0.1573 data: 0.0682 max mem: 9377 +Train: [57] [3000/6250] eta: 0:08:48 lr: 0.000052 grad: 0.1761 (0.2018) loss: 0.6667 (0.6789) time: 0.1295 data: 0.0478 max mem: 9377 +Train: [57] [3100/6250] eta: 0:08:30 lr: 0.000052 grad: 0.1719 (0.2012) loss: 0.6828 (0.6788) time: 0.1408 data: 0.0444 max mem: 9377 +Train: [57] [3200/6250] eta: 0:08:13 lr: 0.000052 grad: 0.1774 (0.2006) loss: 0.6680 (0.6786) time: 0.1585 data: 0.0624 max mem: 9377 +Train: [57] [3300/6250] eta: 0:07:56 lr: 0.000052 grad: 0.1877 (0.2000) loss: 0.6604 (0.6783) time: 0.1397 data: 0.0543 max mem: 9377 +Train: [57] [3400/6250] eta: 0:07:39 lr: 0.000052 grad: 0.1851 (0.1994) loss: 0.6666 (0.6780) time: 0.1403 data: 0.0396 max mem: 9377 +Train: [57] [3500/6250] eta: 0:07:22 lr: 0.000052 grad: 0.1837 (0.1990) loss: 0.6746 (0.6778) time: 0.1507 data: 0.0580 max mem: 9377 +Train: [57] [3600/6250] eta: 0:07:07 lr: 0.000052 grad: 0.1823 (0.1987) loss: 0.6714 (0.6776) time: 0.1672 data: 0.0923 max mem: 9377 +Train: [57] [3700/6250] eta: 0:06:51 lr: 0.000052 grad: 0.1783 (0.1981) loss: 0.6687 (0.6775) time: 0.1696 data: 0.0800 max mem: 9377 +Train: [57] [3800/6250] eta: 0:06:35 lr: 0.000052 grad: 0.1767 (0.1977) loss: 0.6571 (0.6773) time: 0.1696 data: 0.0859 max mem: 9377 +Train: [57] [3900/6250] eta: 0:06:19 lr: 0.000052 grad: 0.1786 (0.1972) loss: 0.6701 (0.6771) time: 0.1421 data: 0.0551 max mem: 9377 +Train: [57] [4000/6250] eta: 0:06:03 lr: 0.000052 grad: 0.1806 (0.1968) loss: 0.6621 (0.6769) time: 0.1606 data: 0.0711 max mem: 9377 +Train: [57] [4100/6250] eta: 0:05:47 lr: 0.000052 grad: 0.1753 (0.1964) loss: 0.6742 (0.6768) time: 0.1638 data: 0.0823 max mem: 9377 +Train: [57] [4200/6250] eta: 0:05:31 lr: 0.000052 grad: 0.1739 (0.1959) loss: 0.6741 (0.6768) time: 0.1673 data: 0.0719 max mem: 9377 +Train: [57] [4300/6250] eta: 0:05:14 lr: 0.000052 grad: 0.1745 (0.1956) loss: 0.6701 (0.6767) time: 0.1618 data: 0.0659 max mem: 9377 +Train: [57] [4400/6250] eta: 0:04:58 lr: 0.000052 grad: 0.1858 (0.1953) loss: 0.6764 (0.6767) time: 0.1392 data: 0.0441 max mem: 9377 +Train: [57] [4500/6250] eta: 0:04:41 lr: 0.000052 grad: 0.1788 (0.1949) loss: 0.6742 (0.6767) time: 0.1561 data: 0.0635 max mem: 9377 +Train: [57] [4600/6250] eta: 0:04:25 lr: 0.000052 grad: 0.1711 (0.1946) loss: 0.6935 (0.6768) time: 0.1671 data: 0.0781 max mem: 9377 +Train: [57] [4700/6250] eta: 0:04:08 lr: 0.000052 grad: 0.1784 (0.1943) loss: 0.6731 (0.6769) time: 0.1759 data: 0.0935 max mem: 9377 +Train: [57] [4800/6250] eta: 0:03:52 lr: 0.000052 grad: 0.1788 (0.1941) loss: 0.6835 (0.6769) time: 0.1574 data: 0.0628 max mem: 9377 +Train: [57] [4900/6250] eta: 0:03:35 lr: 0.000052 grad: 0.1844 (0.1940) loss: 0.6791 (0.6769) time: 0.1663 data: 0.0795 max mem: 9377 +Train: [57] [5000/6250] eta: 0:03:19 lr: 0.000052 grad: 0.1717 (0.1939) loss: 0.6812 (0.6767) time: 0.1609 data: 0.0666 max mem: 9377 +Train: [57] [5100/6250] eta: 0:03:03 lr: 0.000052 grad: 0.1821 (0.1937) loss: 0.6636 (0.6766) time: 0.1534 data: 0.0646 max mem: 9377 +Train: [57] [5200/6250] eta: 0:02:47 lr: 0.000052 grad: 0.1808 (0.1935) loss: 0.6768 (0.6765) time: 0.1355 data: 0.0543 max mem: 9377 +Train: [57] [5300/6250] eta: 0:02:31 lr: 0.000052 grad: 0.1833 (0.1932) loss: 0.6656 (0.6763) time: 0.1463 data: 0.0576 max mem: 9377 +Train: [57] [5400/6250] eta: 0:02:15 lr: 0.000051 grad: 0.1788 (0.1930) loss: 0.6778 (0.6763) time: 0.1440 data: 0.0604 max mem: 9377 +Train: [57] [5500/6250] eta: 0:01:59 lr: 0.000051 grad: 0.1844 (0.1929) loss: 0.6664 (0.6762) time: 0.1656 data: 0.0733 max mem: 9377 +Train: [57] [5600/6250] eta: 0:01:43 lr: 0.000051 grad: 0.1853 (0.1927) loss: 0.6690 (0.6760) time: 0.1547 data: 0.0670 max mem: 9377 +Train: [57] [5700/6250] eta: 0:01:27 lr: 0.000051 grad: 0.1830 (0.1925) loss: 0.6582 (0.6760) time: 0.1349 data: 0.0405 max mem: 9377 +Train: [57] [5800/6250] eta: 0:01:11 lr: 0.000051 grad: 0.1747 (0.1924) loss: 0.6838 (0.6760) time: 0.1658 data: 0.0732 max mem: 9377 +Train: [57] [5900/6250] eta: 0:00:55 lr: 0.000051 grad: 0.1780 (0.1922) loss: 0.6703 (0.6761) time: 0.1408 data: 0.0424 max mem: 9377 +Train: [57] [6000/6250] eta: 0:00:39 lr: 0.000051 grad: 0.1716 (0.1921) loss: 0.6696 (0.6760) time: 0.1651 data: 0.0794 max mem: 9377 +Train: [57] [6100/6250] eta: 0:00:23 lr: 0.000051 grad: 0.1755 (0.1919) loss: 0.6759 (0.6759) time: 0.1473 data: 0.0593 max mem: 9377 +Train: [57] [6200/6250] eta: 0:00:07 lr: 0.000051 grad: 0.1743 (0.1917) loss: 0.6831 (0.6759) time: 0.2281 data: 0.0794 max mem: 9377 +Train: [57] [6249/6250] eta: 0:00:00 lr: 0.000051 grad: 0.1810 (0.1916) loss: 0.6879 (0.6759) time: 0.1458 data: 0.0566 max mem: 9377 +Train: [57] Total time: 0:16:41 (0.1603 s / it) +Averaged stats: lr: 0.000051 grad: 0.1810 (0.1916) loss: 0.6879 (0.6759) +Eval (hcp-train-subset): [57] [ 0/62] eta: 0:05:57 loss: 0.8803 (0.8803) time: 5.7683 data: 5.7385 max mem: 9377 +Eval (hcp-train-subset): [57] [61/62] eta: 0:00:00 loss: 0.8979 (0.8967) time: 0.1309 data: 0.1060 max mem: 9377 +Eval (hcp-train-subset): [57] Total time: 0:00:14 (0.2342 s / it) +Averaged stats (hcp-train-subset): loss: 0.8979 (0.8967) +Eval (hcp-val): [57] [ 0/62] eta: 0:05:52 loss: 0.8920 (0.8920) time: 5.6797 data: 5.6498 max mem: 9377 +Eval (hcp-val): [57] [61/62] eta: 0:00:00 loss: 0.8956 (0.8981) time: 0.1424 data: 0.1111 max mem: 9377 +Eval (hcp-val): [57] Total time: 0:00:14 (0.2354 s / it) +Averaged stats (hcp-val): loss: 0.8956 (0.8981) +Eval (nsd-val): [57] [ 0/62] eta: 0:05:58 loss: 0.8815 (0.8815) time: 5.7849 data: 5.7535 max mem: 9377 +Eval (nsd-val): [57] [61/62] eta: 0:00:00 loss: 0.8860 (0.8861) time: 0.1385 data: 0.1125 max mem: 9377 +Eval (nsd-val): [57] Total time: 0:00:14 (0.2319 s / it) +Averaged stats (nsd-val): loss: 0.8860 (0.8861) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [58] [ 0/6250] eta: 11:20:39 lr: 0.000051 grad: 0.2917 (0.2917) loss: 0.6136 (0.6136) time: 6.5343 data: 6.4285 max mem: 9377 +Train: [58] [ 100/6250] eta: 0:23:27 lr: 0.000051 grad: 0.3325 (0.3411) loss: 0.6938 (0.7097) time: 0.1750 data: 0.0783 max mem: 9377 +Train: [58] [ 200/6250] eta: 0:20:23 lr: 0.000051 grad: 0.2785 (0.3285) loss: 0.6859 (0.7056) time: 0.1789 data: 0.0750 max mem: 9377 +Train: [58] [ 300/6250] eta: 0:18:55 lr: 0.000051 grad: 0.2878 (0.3258) loss: 0.6721 (0.7006) time: 0.1705 data: 0.0653 max mem: 9377 +Train: [58] [ 400/6250] eta: 0:18:03 lr: 0.000051 grad: 0.2319 (0.3069) loss: 0.6700 (0.6959) time: 0.1779 data: 0.0793 max mem: 9377 +Train: [58] [ 500/6250] eta: 0:17:26 lr: 0.000051 grad: 0.2287 (0.2937) loss: 0.6754 (0.6924) time: 0.1719 data: 0.0679 max mem: 9377 +Train: [58] [ 600/6250] eta: 0:16:47 lr: 0.000051 grad: 0.2075 (0.2807) loss: 0.6634 (0.6900) time: 0.1706 data: 0.0792 max mem: 9377 +Train: [58] [ 700/6250] eta: 0:16:11 lr: 0.000051 grad: 0.1926 (0.2689) loss: 0.6810 (0.6878) time: 0.1505 data: 0.0497 max mem: 9377 +Train: [58] [ 800/6250] eta: 0:16:05 lr: 0.000051 grad: 0.1802 (0.2593) loss: 0.6860 (0.6867) time: 0.1994 data: 0.1151 max mem: 9377 +Train: [58] [ 900/6250] eta: 0:15:40 lr: 0.000051 grad: 0.1756 (0.2506) loss: 0.7016 (0.6866) time: 0.1615 data: 0.0684 max mem: 9377 +Train: [58] [1000/6250] eta: 0:15:16 lr: 0.000051 grad: 0.1788 (0.2436) loss: 0.6765 (0.6859) time: 0.1827 data: 0.1008 max mem: 9377 +Train: [58] [1100/6250] eta: 0:14:53 lr: 0.000051 grad: 0.1824 (0.2384) loss: 0.6899 (0.6857) time: 0.1629 data: 0.0651 max mem: 9377 +Train: [58] [1200/6250] eta: 0:14:30 lr: 0.000051 grad: 0.1797 (0.2338) loss: 0.6768 (0.6852) time: 0.1593 data: 0.0623 max mem: 9377 +Train: [58] [1300/6250] eta: 0:14:08 lr: 0.000051 grad: 0.1742 (0.2295) loss: 0.6955 (0.6853) time: 0.1658 data: 0.0774 max mem: 9377 +Train: [58] [1400/6250] eta: 0:13:44 lr: 0.000051 grad: 0.1785 (0.2261) loss: 0.6826 (0.6850) time: 0.1295 data: 0.0327 max mem: 9377 +Train: [58] [1500/6250] eta: 0:13:20 lr: 0.000051 grad: 0.1802 (0.2231) loss: 0.6887 (0.6847) time: 0.1599 data: 0.0622 max mem: 9377 +Train: [58] [1600/6250] eta: 0:12:58 lr: 0.000051 grad: 0.1763 (0.2203) loss: 0.6816 (0.6847) time: 0.1404 data: 0.0505 max mem: 9377 +Train: [58] [1700/6250] eta: 0:12:37 lr: 0.000051 grad: 0.1752 (0.2179) loss: 0.6800 (0.6847) time: 0.1490 data: 0.0535 max mem: 9377 +Train: [58] [1800/6250] eta: 0:12:19 lr: 0.000051 grad: 0.1788 (0.2159) loss: 0.6824 (0.6847) time: 0.1459 data: 0.0509 max mem: 9377 +Train: [58] [1900/6250] eta: 0:11:58 lr: 0.000051 grad: 0.1763 (0.2140) loss: 0.6740 (0.6846) time: 0.1438 data: 0.0483 max mem: 9377 +Train: [58] [2000/6250] eta: 0:11:40 lr: 0.000051 grad: 0.1744 (0.2122) loss: 0.6974 (0.6846) time: 0.1691 data: 0.0875 max mem: 9377 +Train: [58] [2100/6250] eta: 0:11:24 lr: 0.000051 grad: 0.1700 (0.2105) loss: 0.6841 (0.6847) time: 0.1692 data: 0.0851 max mem: 9377 +Train: [58] [2200/6250] eta: 0:11:10 lr: 0.000050 grad: 0.1817 (0.2090) loss: 0.6733 (0.6846) time: 0.1852 data: 0.0946 max mem: 9377 +Train: [58] [2300/6250] eta: 0:10:56 lr: 0.000050 grad: 0.1752 (0.2076) loss: 0.6873 (0.6846) time: 0.1772 data: 0.0934 max mem: 9377 +Train: [58] [2400/6250] eta: 0:10:41 lr: 0.000050 grad: 0.1802 (0.2065) loss: 0.6845 (0.6846) time: 0.1830 data: 0.0891 max mem: 9377 +Train: [58] [2500/6250] eta: 0:10:28 lr: 0.000050 grad: 0.1761 (0.2056) loss: 0.6817 (0.6846) time: 0.2017 data: 0.1088 max mem: 9377 +Train: [58] [2600/6250] eta: 0:10:13 lr: 0.000050 grad: 0.1800 (0.2047) loss: 0.6648 (0.6846) time: 0.1977 data: 0.1042 max mem: 9377 +Train: [58] [2700/6250] eta: 0:09:56 lr: 0.000050 grad: 0.1833 (0.2038) loss: 0.6687 (0.6846) time: 0.1796 data: 0.0862 max mem: 9377 +Train: [58] [2800/6250] eta: 0:09:40 lr: 0.000050 grad: 0.1832 (0.2031) loss: 0.6901 (0.6845) time: 0.1783 data: 0.0901 max mem: 9377 +Train: [58] [2900/6250] eta: 0:09:22 lr: 0.000050 grad: 0.1771 (0.2024) loss: 0.6846 (0.6846) time: 0.1542 data: 0.0662 max mem: 9377 +Train: [58] [3000/6250] eta: 0:09:05 lr: 0.000050 grad: 0.1816 (0.2017) loss: 0.6850 (0.6845) time: 0.1620 data: 0.0774 max mem: 9377 +Train: [58] [3100/6250] eta: 0:08:47 lr: 0.000050 grad: 0.1803 (0.2011) loss: 0.6813 (0.6844) time: 0.1555 data: 0.0654 max mem: 9377 +Train: [58] [3200/6250] eta: 0:08:30 lr: 0.000050 grad: 0.1752 (0.2006) loss: 0.6796 (0.6843) time: 0.1557 data: 0.0665 max mem: 9377 +Train: [58] [3300/6250] eta: 0:08:14 lr: 0.000050 grad: 0.1788 (0.2000) loss: 0.6832 (0.6843) time: 0.1618 data: 0.0785 max mem: 9377 +Train: [58] [3400/6250] eta: 0:07:57 lr: 0.000050 grad: 0.1780 (0.1995) loss: 0.6922 (0.6844) time: 0.1607 data: 0.0807 max mem: 9377 +Train: [58] [3500/6250] eta: 0:07:40 lr: 0.000050 grad: 0.1745 (0.1988) loss: 0.6864 (0.6847) time: 0.1223 data: 0.0366 max mem: 9377 +Train: [58] [3600/6250] eta: 0:07:24 lr: 0.000050 grad: 0.1817 (0.1983) loss: 0.6847 (0.6847) time: 0.1851 data: 0.1013 max mem: 9377 +Train: [58] [3700/6250] eta: 0:07:07 lr: 0.000050 grad: 0.1778 (0.1979) loss: 0.6766 (0.6848) time: 0.1547 data: 0.0697 max mem: 9377 +Train: [58] [3800/6250] eta: 0:06:50 lr: 0.000050 grad: 0.1822 (0.1976) loss: 0.6814 (0.6848) time: 0.1600 data: 0.0716 max mem: 9377 +Train: [58] [3900/6250] eta: 0:06:33 lr: 0.000050 grad: 0.1786 (0.1972) loss: 0.6836 (0.6847) time: 0.1595 data: 0.0760 max mem: 9377 +Train: [58] [4000/6250] eta: 0:06:16 lr: 0.000050 grad: 0.1760 (0.1968) loss: 0.6757 (0.6846) time: 0.1647 data: 0.0765 max mem: 9377 +Train: [58] [4100/6250] eta: 0:05:58 lr: 0.000050 grad: 0.1796 (0.1965) loss: 0.6856 (0.6846) time: 0.1457 data: 0.0646 max mem: 9377 +Train: [58] [4200/6250] eta: 0:05:42 lr: 0.000050 grad: 0.1785 (0.1962) loss: 0.6814 (0.6844) time: 0.1690 data: 0.0595 max mem: 9377 +Train: [58] [4300/6250] eta: 0:05:26 lr: 0.000050 grad: 0.1769 (0.1959) loss: 0.6695 (0.6841) time: 0.1590 data: 0.0610 max mem: 9377 +Train: [58] [4400/6250] eta: 0:05:09 lr: 0.000050 grad: 0.1819 (0.1956) loss: 0.6741 (0.6840) time: 0.1500 data: 0.0589 max mem: 9377 +Train: [58] [4500/6250] eta: 0:04:52 lr: 0.000050 grad: 0.1826 (0.1953) loss: 0.6626 (0.6838) time: 0.1564 data: 0.0523 max mem: 9377 +Train: [58] [4600/6250] eta: 0:04:34 lr: 0.000050 grad: 0.1803 (0.1950) loss: 0.6740 (0.6836) time: 0.1474 data: 0.0626 max mem: 9377 +Train: [58] [4700/6250] eta: 0:04:18 lr: 0.000050 grad: 0.1762 (0.1947) loss: 0.6692 (0.6835) time: 0.1516 data: 0.0590 max mem: 9377 +Train: [58] [4800/6250] eta: 0:04:00 lr: 0.000050 grad: 0.1813 (0.1945) loss: 0.6645 (0.6834) time: 0.1374 data: 0.0440 max mem: 9377 +Train: [58] [4900/6250] eta: 0:03:43 lr: 0.000050 grad: 0.1844 (0.1943) loss: 0.6624 (0.6832) time: 0.1524 data: 0.0659 max mem: 9377 +Train: [58] [5000/6250] eta: 0:03:27 lr: 0.000050 grad: 0.1782 (0.1940) loss: 0.6762 (0.6831) time: 0.1552 data: 0.0607 max mem: 9377 +Train: [58] [5100/6250] eta: 0:03:10 lr: 0.000050 grad: 0.1809 (0.1938) loss: 0.6788 (0.6829) time: 0.1239 data: 0.0365 max mem: 9377 +Train: [58] [5200/6250] eta: 0:02:53 lr: 0.000050 grad: 0.1843 (0.1936) loss: 0.6803 (0.6828) time: 0.1335 data: 0.0472 max mem: 9377 +Train: [58] [5300/6250] eta: 0:02:36 lr: 0.000049 grad: 0.1778 (0.1934) loss: 0.6848 (0.6827) time: 0.1707 data: 0.0684 max mem: 9377 +Train: [58] [5400/6250] eta: 0:02:20 lr: 0.000049 grad: 0.1834 (0.1932) loss: 0.6709 (0.6826) time: 0.1677 data: 0.0797 max mem: 9377 +Train: [58] [5500/6250] eta: 0:02:03 lr: 0.000049 grad: 0.1792 (0.1929) loss: 0.6671 (0.6825) time: 0.1486 data: 0.0619 max mem: 9377 +Train: [58] [5600/6250] eta: 0:01:47 lr: 0.000049 grad: 0.1752 (0.1928) loss: 0.6722 (0.6824) time: 0.1608 data: 0.0737 max mem: 9377 +Train: [58] [5700/6250] eta: 0:01:30 lr: 0.000049 grad: 0.1791 (0.1926) loss: 0.6793 (0.6824) time: 0.1629 data: 0.0770 max mem: 9377 +Train: [58] [5800/6250] eta: 0:01:14 lr: 0.000049 grad: 0.1807 (0.1924) loss: 0.6821 (0.6823) time: 0.1451 data: 0.0538 max mem: 9377 +Train: [58] [5900/6250] eta: 0:00:57 lr: 0.000049 grad: 0.1807 (0.1922) loss: 0.6687 (0.6823) time: 0.1406 data: 0.0510 max mem: 9377 +Train: [58] [6000/6250] eta: 0:00:41 lr: 0.000049 grad: 0.1833 (0.1921) loss: 0.6677 (0.6821) time: 0.1453 data: 0.0498 max mem: 9377 +Train: [58] [6100/6250] eta: 0:00:24 lr: 0.000049 grad: 0.1817 (0.1920) loss: 0.6799 (0.6821) time: 0.1679 data: 0.0867 max mem: 9377 +Train: [58] [6200/6250] eta: 0:00:08 lr: 0.000049 grad: 0.1909 (0.1919) loss: 0.6634 (0.6821) time: 0.1334 data: 0.0476 max mem: 9377 +Train: [58] [6249/6250] eta: 0:00:00 lr: 0.000049 grad: 0.1797 (0.1918) loss: 0.6845 (0.6821) time: 0.1435 data: 0.0503 max mem: 9377 +Train: [58] Total time: 0:17:09 (0.1648 s / it) +Averaged stats: lr: 0.000049 grad: 0.1797 (0.1918) loss: 0.6845 (0.6821) +Eval (hcp-train-subset): [58] [ 0/62] eta: 0:04:19 loss: 0.8812 (0.8812) time: 4.1917 data: 4.1206 max mem: 9377 +Eval (hcp-train-subset): [58] [61/62] eta: 0:00:00 loss: 0.8947 (0.8956) time: 0.1384 data: 0.1134 max mem: 9377 +Eval (hcp-train-subset): [58] Total time: 0:00:14 (0.2310 s / it) +Averaged stats (hcp-train-subset): loss: 0.8947 (0.8956) +Eval (hcp-val): [58] [ 0/62] eta: 0:03:46 loss: 0.9053 (0.9053) time: 3.6488 data: 3.5820 max mem: 9377 +Eval (hcp-val): [58] [61/62] eta: 0:00:00 loss: 0.8944 (0.8956) time: 0.1291 data: 0.1026 max mem: 9377 +Eval (hcp-val): [58] Total time: 0:00:14 (0.2387 s / it) +Averaged stats (hcp-val): loss: 0.8944 (0.8956) +Eval (nsd-val): [58] [ 0/62] eta: 0:05:08 loss: 0.8741 (0.8741) time: 4.9796 data: 4.9472 max mem: 9377 +Eval (nsd-val): [58] [61/62] eta: 0:00:00 loss: 0.8850 (0.8873) time: 0.1323 data: 0.1063 max mem: 9377 +Eval (nsd-val): [58] Total time: 0:00:14 (0.2337 s / it) +Averaged stats (nsd-val): loss: 0.8850 (0.8873) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [59] [ 0/6250] eta: 11:39:08 lr: 0.000049 grad: 0.2716 (0.2716) loss: 0.6483 (0.6483) time: 6.7117 data: 6.6140 max mem: 9377 +Train: [59] [ 100/6250] eta: 0:22:55 lr: 0.000049 grad: 0.3016 (0.3533) loss: 0.7006 (0.7099) time: 0.1783 data: 0.0670 max mem: 9377 +Train: [59] [ 200/6250] eta: 0:19:49 lr: 0.000049 grad: 0.2497 (0.3227) loss: 0.6801 (0.6997) time: 0.1631 data: 0.0561 max mem: 9377 +Train: [59] [ 300/6250] eta: 0:18:49 lr: 0.000049 grad: 0.2476 (0.3010) loss: 0.6882 (0.6966) time: 0.1737 data: 0.0752 max mem: 9377 +Train: [59] [ 400/6250] eta: 0:17:50 lr: 0.000049 grad: 0.2065 (0.2788) loss: 0.6945 (0.6944) time: 0.1424 data: 0.0355 max mem: 9377 +Train: [59] [ 500/6250] eta: 0:17:09 lr: 0.000049 grad: 0.1887 (0.2613) loss: 0.6925 (0.6936) time: 0.1455 data: 0.0470 max mem: 9377 +Train: [59] [ 600/6250] eta: 0:16:24 lr: 0.000049 grad: 0.1822 (0.2496) loss: 0.7011 (0.6929) time: 0.1393 data: 0.0257 max mem: 9377 +Train: [59] [ 700/6250] eta: 0:15:55 lr: 0.000049 grad: 0.1951 (0.2413) loss: 0.6811 (0.6929) time: 0.1503 data: 0.0523 max mem: 9377 +Train: [59] [ 800/6250] eta: 0:15:36 lr: 0.000049 grad: 0.1855 (0.2347) loss: 0.6806 (0.6921) time: 0.1654 data: 0.0674 max mem: 9377 +Train: [59] [ 900/6250] eta: 0:15:18 lr: 0.000049 grad: 0.1877 (0.2294) loss: 0.6733 (0.6904) time: 0.1733 data: 0.0784 max mem: 9377 +Train: [59] [1000/6250] eta: 0:14:54 lr: 0.000049 grad: 0.1763 (0.2247) loss: 0.6987 (0.6896) time: 0.1382 data: 0.0463 max mem: 9377 +Train: [59] [1100/6250] eta: 0:14:35 lr: 0.000049 grad: 0.1816 (0.2210) loss: 0.6875 (0.6885) time: 0.1701 data: 0.0741 max mem: 9377 +Train: [59] [1200/6250] eta: 0:14:14 lr: 0.000049 grad: 0.1875 (0.2186) loss: 0.6772 (0.6875) time: 0.1642 data: 0.0784 max mem: 9377 +Train: [59] [1300/6250] eta: 0:13:54 lr: 0.000049 grad: 0.1781 (0.2162) loss: 0.6816 (0.6867) time: 0.1512 data: 0.0571 max mem: 9377 +Train: [59] [1400/6250] eta: 0:13:32 lr: 0.000049 grad: 0.1794 (0.2139) loss: 0.6895 (0.6863) time: 0.1635 data: 0.0644 max mem: 9377 +Train: [59] [1500/6250] eta: 0:13:14 lr: 0.000049 grad: 0.1813 (0.2117) loss: 0.6819 (0.6857) time: 0.1764 data: 0.0785 max mem: 9377 +Train: [59] [1600/6250] eta: 0:12:57 lr: 0.000049 grad: 0.1820 (0.2098) loss: 0.6852 (0.6852) time: 0.1636 data: 0.0644 max mem: 9377 +Train: [59] [1700/6250] eta: 0:12:38 lr: 0.000049 grad: 0.1804 (0.2082) loss: 0.6631 (0.6846) time: 0.1512 data: 0.0572 max mem: 9377 +Train: [59] [1800/6250] eta: 0:12:18 lr: 0.000049 grad: 0.1792 (0.2066) loss: 0.6896 (0.6841) time: 0.1396 data: 0.0483 max mem: 9377 +Train: [59] [1900/6250] eta: 0:12:01 lr: 0.000049 grad: 0.1806 (0.2054) loss: 0.6824 (0.6838) time: 0.1761 data: 0.0894 max mem: 9377 +Train: [59] [2000/6250] eta: 0:11:41 lr: 0.000049 grad: 0.1899 (0.2045) loss: 0.6749 (0.6832) time: 0.1365 data: 0.0411 max mem: 9377 +Train: [59] [2100/6250] eta: 0:11:26 lr: 0.000048 grad: 0.1823 (0.2035) loss: 0.6854 (0.6829) time: 0.2032 data: 0.1130 max mem: 9377 +Train: [59] [2200/6250] eta: 0:11:09 lr: 0.000048 grad: 0.1872 (0.2027) loss: 0.6761 (0.6825) time: 0.1768 data: 0.0908 max mem: 9377 +Train: [59] [2300/6250] eta: 0:10:51 lr: 0.000048 grad: 0.1808 (0.2022) loss: 0.6769 (0.6821) time: 0.1469 data: 0.0597 max mem: 9377 +Train: [59] [2400/6250] eta: 0:10:33 lr: 0.000048 grad: 0.1805 (0.2015) loss: 0.6764 (0.6818) time: 0.1740 data: 0.0749 max mem: 9377 +Train: [59] [2500/6250] eta: 0:10:16 lr: 0.000048 grad: 0.1783 (0.2009) loss: 0.6705 (0.6815) time: 0.1631 data: 0.0736 max mem: 9377 +Train: [59] [2600/6250] eta: 0:09:59 lr: 0.000048 grad: 0.1785 (0.2002) loss: 0.6700 (0.6812) time: 0.1480 data: 0.0555 max mem: 9377 +Train: [59] [2700/6250] eta: 0:09:42 lr: 0.000048 grad: 0.1840 (0.1995) loss: 0.6707 (0.6810) time: 0.1583 data: 0.0632 max mem: 9377 +Train: [59] [2800/6250] eta: 0:09:23 lr: 0.000048 grad: 0.1791 (0.1988) loss: 0.6662 (0.6810) time: 0.1452 data: 0.0493 max mem: 9377 +Train: [59] [2900/6250] eta: 0:09:06 lr: 0.000048 grad: 0.1814 (0.1982) loss: 0.6711 (0.6809) time: 0.1556 data: 0.0496 max mem: 9377 +Train: [59] [3000/6250] eta: 0:08:47 lr: 0.000048 grad: 0.1902 (0.1978) loss: 0.6777 (0.6808) time: 0.1403 data: 0.0417 max mem: 9377 +Train: [59] [3100/6250] eta: 0:08:30 lr: 0.000048 grad: 0.1792 (0.1974) loss: 0.6648 (0.6806) time: 0.1352 data: 0.0481 max mem: 9377 +Train: [59] [3200/6250] eta: 0:08:12 lr: 0.000048 grad: 0.1751 (0.1970) loss: 0.6875 (0.6805) time: 0.1576 data: 0.0716 max mem: 9377 +Train: [59] [3300/6250] eta: 0:07:55 lr: 0.000048 grad: 0.1797 (0.1965) loss: 0.6732 (0.6805) time: 0.1365 data: 0.0492 max mem: 9377 +Train: [59] [3400/6250] eta: 0:07:38 lr: 0.000048 grad: 0.1784 (0.1962) loss: 0.6834 (0.6805) time: 0.1699 data: 0.0872 max mem: 9377 +Train: [59] [3500/6250] eta: 0:07:21 lr: 0.000048 grad: 0.1746 (0.1958) loss: 0.6862 (0.6804) time: 0.1399 data: 0.0498 max mem: 9377 +Train: [59] [3600/6250] eta: 0:07:04 lr: 0.000048 grad: 0.1860 (0.1955) loss: 0.6775 (0.6804) time: 0.1666 data: 0.0843 max mem: 9377 +Train: [59] [3700/6250] eta: 0:06:49 lr: 0.000048 grad: 0.1813 (0.1952) loss: 0.6703 (0.6804) time: 0.2174 data: 0.1361 max mem: 9377 +Train: [59] [3800/6250] eta: 0:06:35 lr: 0.000048 grad: 0.1830 (0.1950) loss: 0.6858 (0.6803) time: 0.1623 data: 0.0845 max mem: 9377 +Train: [59] [3900/6250] eta: 0:06:20 lr: 0.000048 grad: 0.1753 (0.1947) loss: 0.6716 (0.6801) time: 0.1826 data: 0.1005 max mem: 9377 +Train: [59] [4000/6250] eta: 0:06:05 lr: 0.000048 grad: 0.1842 (0.1944) loss: 0.6772 (0.6800) time: 0.1686 data: 0.0813 max mem: 9377 +Train: [59] [4100/6250] eta: 0:05:50 lr: 0.000048 grad: 0.1811 (0.1941) loss: 0.6752 (0.6799) time: 0.2101 data: 0.1198 max mem: 9377 +Train: [59] [4200/6250] eta: 0:05:35 lr: 0.000048 grad: 0.1852 (0.1939) loss: 0.6721 (0.6799) time: 0.1578 data: 0.0671 max mem: 9377 +Train: [59] [4300/6250] eta: 0:05:19 lr: 0.000048 grad: 0.1808 (0.1937) loss: 0.6796 (0.6797) time: 0.1758 data: 0.0865 max mem: 9377 +Train: [59] [4400/6250] eta: 0:05:03 lr: 0.000048 grad: 0.1828 (0.1935) loss: 0.6784 (0.6795) time: 0.1591 data: 0.0603 max mem: 9377 +Train: [59] [4500/6250] eta: 0:04:47 lr: 0.000048 grad: 0.1820 (0.1932) loss: 0.6769 (0.6794) time: 0.1717 data: 0.0781 max mem: 9377 +Train: [59] [4600/6250] eta: 0:04:30 lr: 0.000048 grad: 0.1803 (0.1930) loss: 0.6758 (0.6794) time: 0.1722 data: 0.0862 max mem: 9377 +Train: [59] [4700/6250] eta: 0:04:13 lr: 0.000048 grad: 0.1782 (0.1927) loss: 0.6817 (0.6794) time: 0.1366 data: 0.0453 max mem: 9377 +Train: [59] [4800/6250] eta: 0:03:57 lr: 0.000048 grad: 0.1830 (0.1925) loss: 0.6783 (0.6796) time: 0.1749 data: 0.0802 max mem: 9377 +Train: [59] [4900/6250] eta: 0:03:40 lr: 0.000048 grad: 0.1741 (0.1922) loss: 0.6813 (0.6796) time: 0.1409 data: 0.0448 max mem: 9377 +Train: [59] [5000/6250] eta: 0:03:24 lr: 0.000048 grad: 0.1713 (0.1920) loss: 0.6667 (0.6795) time: 0.1511 data: 0.0566 max mem: 9377 +Train: [59] [5100/6250] eta: 0:03:07 lr: 0.000048 grad: 0.1791 (0.1919) loss: 0.6822 (0.6794) time: 0.1537 data: 0.0722 max mem: 9377 +Train: [59] [5200/6250] eta: 0:02:51 lr: 0.000047 grad: 0.1913 (0.1918) loss: 0.6566 (0.6792) time: 0.1927 data: 0.1061 max mem: 9377 +Train: [59] [5300/6250] eta: 0:02:34 lr: 0.000047 grad: 0.1798 (0.1916) loss: 0.6748 (0.6791) time: 0.1741 data: 0.0931 max mem: 9377 +Train: [59] [5400/6250] eta: 0:02:18 lr: 0.000047 grad: 0.1790 (0.1915) loss: 0.6744 (0.6790) time: 0.1724 data: 0.0784 max mem: 9377 +Train: [59] [5500/6250] eta: 0:02:02 lr: 0.000047 grad: 0.1781 (0.1913) loss: 0.6754 (0.6789) time: 0.1471 data: 0.0558 max mem: 9377 +Train: [59] [5600/6250] eta: 0:01:46 lr: 0.000047 grad: 0.1787 (0.1912) loss: 0.6849 (0.6789) time: 0.2000 data: 0.1164 max mem: 9377 +Train: [59] [5700/6250] eta: 0:01:29 lr: 0.000047 grad: 0.1804 (0.1910) loss: 0.6767 (0.6788) time: 0.1469 data: 0.0543 max mem: 9377 +Train: [59] [5800/6250] eta: 0:01:13 lr: 0.000047 grad: 0.1725 (0.1909) loss: 0.6704 (0.6787) time: 0.1627 data: 0.0656 max mem: 9377 +Train: [59] [5900/6250] eta: 0:00:56 lr: 0.000047 grad: 0.1761 (0.1908) loss: 0.6686 (0.6786) time: 0.1481 data: 0.0569 max mem: 9377 +Train: [59] [6000/6250] eta: 0:00:40 lr: 0.000047 grad: 0.1809 (0.1907) loss: 0.6599 (0.6784) time: 0.1618 data: 0.0627 max mem: 9377 +Train: [59] [6100/6250] eta: 0:00:24 lr: 0.000047 grad: 0.1781 (0.1907) loss: 0.6628 (0.6783) time: 0.1578 data: 0.0629 max mem: 9377 +Train: [59] [6200/6250] eta: 0:00:08 lr: 0.000047 grad: 0.1845 (0.1906) loss: 0.6534 (0.6781) time: 0.1504 data: 0.0576 max mem: 9377 +Train: [59] [6249/6250] eta: 0:00:00 lr: 0.000047 grad: 0.1877 (0.1906) loss: 0.6738 (0.6781) time: 0.1796 data: 0.0907 max mem: 9377 +Train: [59] Total time: 0:17:00 (0.1633 s / it) +Averaged stats: lr: 0.000047 grad: 0.1877 (0.1906) loss: 0.6738 (0.6781) +Eval (hcp-train-subset): [59] [ 0/62] eta: 0:03:41 loss: 0.8883 (0.8883) time: 3.5765 data: 3.4685 max mem: 9377 +Eval (hcp-train-subset): [59] [61/62] eta: 0:00:00 loss: 0.8978 (0.8991) time: 0.1459 data: 0.1186 max mem: 9377 +Eval (hcp-train-subset): [59] Total time: 0:00:14 (0.2299 s / it) +Averaged stats (hcp-train-subset): loss: 0.8978 (0.8991) +Making plots (hcp-train-subset): example=57 +Eval (hcp-val): [59] [ 0/62] eta: 0:05:50 loss: 0.8989 (0.8989) time: 5.6520 data: 5.6216 max mem: 9377 +Eval (hcp-val): [59] [61/62] eta: 0:00:00 loss: 0.8998 (0.9005) time: 0.1499 data: 0.1247 max mem: 9377 +Eval (hcp-val): [59] Total time: 0:00:15 (0.2427 s / it) +Averaged stats (hcp-val): loss: 0.8998 (0.9005) +Making plots (hcp-val): example=2 +Eval (nsd-val): [59] [ 0/62] eta: 0:04:21 loss: 0.8900 (0.8900) time: 4.2239 data: 4.1267 max mem: 9377 +Eval (nsd-val): [59] [61/62] eta: 0:00:00 loss: 0.8939 (0.8953) time: 0.1421 data: 0.1170 max mem: 9377 +Eval (nsd-val): [59] Total time: 0:00:14 (0.2388 s / it) +Averaged stats (nsd-val): loss: 0.8939 (0.8953) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00059.pth +Train: [60] [ 0/6250] eta: 11:11:38 lr: 0.000047 grad: 0.2171 (0.2171) loss: 0.7541 (0.7541) time: 6.4477 data: 6.3158 max mem: 9377 +Train: [60] [ 100/6250] eta: 0:22:39 lr: 0.000047 grad: 0.3208 (0.3416) loss: 0.7125 (0.7203) time: 0.1476 data: 0.0354 max mem: 9377 +Train: [60] [ 200/6250] eta: 0:19:23 lr: 0.000047 grad: 0.3295 (0.3325) loss: 0.6992 (0.7126) time: 0.1636 data: 0.0533 max mem: 9377 +Train: [60] [ 300/6250] eta: 0:18:10 lr: 0.000047 grad: 0.2785 (0.3197) loss: 0.6620 (0.7026) time: 0.1543 data: 0.0573 max mem: 9377 +Train: [60] [ 400/6250] eta: 0:17:15 lr: 0.000047 grad: 0.2587 (0.3064) loss: 0.6761 (0.6969) time: 0.1775 data: 0.0799 max mem: 9377 +Train: [60] [ 500/6250] eta: 0:16:25 lr: 0.000047 grad: 0.2065 (0.2894) loss: 0.6872 (0.6935) time: 0.1412 data: 0.0417 max mem: 9377 +Train: [60] [ 600/6250] eta: 0:15:51 lr: 0.000047 grad: 0.2083 (0.2762) loss: 0.6609 (0.6899) time: 0.1723 data: 0.0731 max mem: 9377 +Train: [60] [ 700/6250] eta: 0:15:54 lr: 0.000047 grad: 0.2148 (0.2684) loss: 0.6676 (0.6867) time: 0.2202 data: 0.1297 max mem: 9377 +Train: [60] [ 800/6250] eta: 0:15:45 lr: 0.000047 grad: 0.1943 (0.2608) loss: 0.6663 (0.6842) time: 0.1756 data: 0.0857 max mem: 9377 +Train: [60] [ 900/6250] eta: 0:15:34 lr: 0.000047 grad: 0.1925 (0.2540) loss: 0.6726 (0.6826) time: 0.1434 data: 0.0440 max mem: 9377 +Train: [60] [1000/6250] eta: 0:15:10 lr: 0.000047 grad: 0.1926 (0.2491) loss: 0.6580 (0.6806) time: 0.1443 data: 0.0600 max mem: 9377 +Train: [60] [1100/6250] eta: 0:14:58 lr: 0.000047 grad: 0.1812 (0.2441) loss: 0.6720 (0.6794) time: 0.1853 data: 0.0958 max mem: 9377 +Train: [60] [1200/6250] eta: 0:14:42 lr: 0.000047 grad: 0.1867 (0.2396) loss: 0.6821 (0.6784) time: 0.1696 data: 0.0763 max mem: 9377 +Train: [60] [1300/6250] eta: 0:14:22 lr: 0.000047 grad: 0.1780 (0.2357) loss: 0.6792 (0.6782) time: 0.2003 data: 0.1088 max mem: 9377 +Train: [60] [1400/6250] eta: 0:13:57 lr: 0.000047 grad: 0.1787 (0.2319) loss: 0.6733 (0.6779) time: 0.1692 data: 0.0759 max mem: 9377 +Train: [60] [1500/6250] eta: 0:13:36 lr: 0.000047 grad: 0.1762 (0.2286) loss: 0.6775 (0.6777) time: 0.1801 data: 0.0997 max mem: 9377 +Train: [60] [1600/6250] eta: 0:13:16 lr: 0.000047 grad: 0.1776 (0.2257) loss: 0.6703 (0.6773) time: 0.1547 data: 0.0772 max mem: 9377 +Train: [60] [1700/6250] eta: 0:12:57 lr: 0.000047 grad: 0.1751 (0.2233) loss: 0.6730 (0.6772) time: 0.1490 data: 0.0555 max mem: 9377 +Train: [60] [1800/6250] eta: 0:12:38 lr: 0.000047 grad: 0.1845 (0.2213) loss: 0.6811 (0.6771) time: 0.1513 data: 0.0656 max mem: 9377 +Train: [60] [1900/6250] eta: 0:12:19 lr: 0.000047 grad: 0.1779 (0.2196) loss: 0.6805 (0.6768) time: 0.1561 data: 0.0695 max mem: 9377 +Train: [60] [2000/6250] eta: 0:12:02 lr: 0.000047 grad: 0.1787 (0.2179) loss: 0.6756 (0.6767) time: 0.1627 data: 0.0823 max mem: 9377 +Train: [60] [2100/6250] eta: 0:11:43 lr: 0.000046 grad: 0.1774 (0.2162) loss: 0.6809 (0.6766) time: 0.1618 data: 0.0752 max mem: 9377 +Train: [60] [2200/6250] eta: 0:11:26 lr: 0.000046 grad: 0.1849 (0.2148) loss: 0.6777 (0.6765) time: 0.1807 data: 0.0984 max mem: 9377 +Train: [60] [2300/6250] eta: 0:11:07 lr: 0.000046 grad: 0.1790 (0.2136) loss: 0.6735 (0.6765) time: 0.1626 data: 0.0802 max mem: 9377 +Train: [60] [2400/6250] eta: 0:10:47 lr: 0.000046 grad: 0.1796 (0.2124) loss: 0.6852 (0.6764) time: 0.1678 data: 0.0852 max mem: 9377 +Train: [60] [2500/6250] eta: 0:10:29 lr: 0.000046 grad: 0.1813 (0.2114) loss: 0.6655 (0.6764) time: 0.1434 data: 0.0654 max mem: 9377 +Train: [60] [2600/6250] eta: 0:10:11 lr: 0.000046 grad: 0.1895 (0.2104) loss: 0.6695 (0.6764) time: 0.1498 data: 0.0639 max mem: 9377 +Train: [60] [2700/6250] eta: 0:09:51 lr: 0.000046 grad: 0.1761 (0.2095) loss: 0.6817 (0.6762) time: 0.1418 data: 0.0519 max mem: 9377 +Train: [60] [2800/6250] eta: 0:09:32 lr: 0.000046 grad: 0.1786 (0.2086) loss: 0.6740 (0.6763) time: 0.1348 data: 0.0378 max mem: 9377 +Train: [60] [2900/6250] eta: 0:09:15 lr: 0.000046 grad: 0.1803 (0.2079) loss: 0.6656 (0.6763) time: 0.1709 data: 0.0707 max mem: 9377 +Train: [60] [3000/6250] eta: 0:08:57 lr: 0.000046 grad: 0.1772 (0.2071) loss: 0.6787 (0.6763) time: 0.1620 data: 0.0640 max mem: 9377 +Train: [60] [3100/6250] eta: 0:08:40 lr: 0.000046 grad: 0.1857 (0.2065) loss: 0.6831 (0.6762) time: 0.1421 data: 0.0496 max mem: 9377 +Train: [60] [3200/6250] eta: 0:08:23 lr: 0.000046 grad: 0.1890 (0.2058) loss: 0.6575 (0.6761) time: 0.1564 data: 0.0774 max mem: 9377 +Train: [60] [3300/6250] eta: 0:08:06 lr: 0.000046 grad: 0.1770 (0.2051) loss: 0.6787 (0.6761) time: 0.1753 data: 0.0902 max mem: 9377 +Train: [60] [3400/6250] eta: 0:07:49 lr: 0.000046 grad: 0.1796 (0.2045) loss: 0.6748 (0.6761) time: 0.1547 data: 0.0671 max mem: 9377 +Train: [60] [3500/6250] eta: 0:07:33 lr: 0.000046 grad: 0.1821 (0.2039) loss: 0.6753 (0.6761) time: 0.1774 data: 0.0926 max mem: 9377 +Train: [60] [3600/6250] eta: 0:07:16 lr: 0.000046 grad: 0.1867 (0.2034) loss: 0.6861 (0.6761) time: 0.1745 data: 0.0894 max mem: 9377 +Train: [60] [3700/6250] eta: 0:07:00 lr: 0.000046 grad: 0.1802 (0.2028) loss: 0.6726 (0.6761) time: 0.1616 data: 0.0710 max mem: 9377 +Train: [60] [3800/6250] eta: 0:06:44 lr: 0.000046 grad: 0.1792 (0.2023) loss: 0.6770 (0.6761) time: 0.2050 data: 0.1115 max mem: 9377 +Train: [60] [3900/6250] eta: 0:06:27 lr: 0.000046 grad: 0.1793 (0.2019) loss: 0.6745 (0.6762) time: 0.1673 data: 0.0842 max mem: 9377 +Train: [60] [4000/6250] eta: 0:06:10 lr: 0.000046 grad: 0.1824 (0.2014) loss: 0.6760 (0.6763) time: 0.1406 data: 0.0555 max mem: 9377 +Train: [60] [4100/6250] eta: 0:05:54 lr: 0.000046 grad: 0.1753 (0.2010) loss: 0.6804 (0.6764) time: 0.1794 data: 0.0857 max mem: 9377 +Train: [60] [4200/6250] eta: 0:05:38 lr: 0.000046 grad: 0.1875 (0.2005) loss: 0.6550 (0.6764) time: 0.1962 data: 0.1069 max mem: 9377 +Train: [60] [4300/6250] eta: 0:05:21 lr: 0.000046 grad: 0.1769 (0.2001) loss: 0.6839 (0.6765) time: 0.1676 data: 0.0755 max mem: 9377 +Train: [60] [4400/6250] eta: 0:05:05 lr: 0.000046 grad: 0.1878 (0.1999) loss: 0.6741 (0.6764) time: 0.1558 data: 0.0643 max mem: 9377 +Train: [60] [4500/6250] eta: 0:04:48 lr: 0.000046 grad: 0.1743 (0.1995) loss: 0.6890 (0.6765) time: 0.1583 data: 0.0612 max mem: 9377 +Train: [60] [4600/6250] eta: 0:04:32 lr: 0.000046 grad: 0.1726 (0.1991) loss: 0.6869 (0.6764) time: 0.1737 data: 0.0822 max mem: 9377 +Train: [60] [4700/6250] eta: 0:04:15 lr: 0.000046 grad: 0.1777 (0.1988) loss: 0.6799 (0.6764) time: 0.1490 data: 0.0548 max mem: 9377 +Train: [60] [4800/6250] eta: 0:03:58 lr: 0.000046 grad: 0.1862 (0.1985) loss: 0.6665 (0.6763) time: 0.1447 data: 0.0598 max mem: 9377 +Train: [60] [4900/6250] eta: 0:03:41 lr: 0.000046 grad: 0.1784 (0.1982) loss: 0.6793 (0.6763) time: 0.1519 data: 0.0536 max mem: 9377 +Train: [60] [5000/6250] eta: 0:03:25 lr: 0.000046 grad: 0.1840 (0.1979) loss: 0.6678 (0.6762) time: 0.1828 data: 0.1002 max mem: 9377 +Train: [60] [5100/6250] eta: 0:03:08 lr: 0.000046 grad: 0.1810 (0.1978) loss: 0.6810 (0.6760) time: 0.1491 data: 0.0569 max mem: 9377 +Train: [60] [5200/6250] eta: 0:02:52 lr: 0.000045 grad: 0.1829 (0.1975) loss: 0.6571 (0.6757) time: 0.1474 data: 0.0586 max mem: 9377 +Train: [60] [5300/6250] eta: 0:02:35 lr: 0.000045 grad: 0.1825 (0.1974) loss: 0.6699 (0.6756) time: 0.1616 data: 0.0764 max mem: 9377 +Train: [60] [5400/6250] eta: 0:02:19 lr: 0.000045 grad: 0.1838 (0.1972) loss: 0.6783 (0.6755) time: 0.1714 data: 0.0780 max mem: 9377 +Train: [60] [5500/6250] eta: 0:02:02 lr: 0.000045 grad: 0.1823 (0.1969) loss: 0.6695 (0.6755) time: 0.1758 data: 0.0877 max mem: 9377 +Train: [60] [5600/6250] eta: 0:01:46 lr: 0.000045 grad: 0.1760 (0.1967) loss: 0.6767 (0.6755) time: 0.1562 data: 0.0710 max mem: 9377 +Train: [60] [5700/6250] eta: 0:01:29 lr: 0.000045 grad: 0.1818 (0.1966) loss: 0.6570 (0.6755) time: 0.1442 data: 0.0559 max mem: 9377 +Train: [60] [5800/6250] eta: 0:01:13 lr: 0.000045 grad: 0.1830 (0.1964) loss: 0.6752 (0.6754) time: 0.1515 data: 0.0572 max mem: 9377 +Train: [60] [5900/6250] eta: 0:00:57 lr: 0.000045 grad: 0.1846 (0.1962) loss: 0.6661 (0.6754) time: 0.1537 data: 0.0682 max mem: 9377 +Train: [60] [6000/6250] eta: 0:00:40 lr: 0.000045 grad: 0.1865 (0.1960) loss: 0.6681 (0.6754) time: 0.1571 data: 0.0734 max mem: 9377 +Train: [60] [6100/6250] eta: 0:00:24 lr: 0.000045 grad: 0.1786 (0.1959) loss: 0.6653 (0.6753) time: 0.1495 data: 0.0697 max mem: 9377 +Train: [60] [6200/6250] eta: 0:00:08 lr: 0.000045 grad: 0.1804 (0.1957) loss: 0.6823 (0.6754) time: 0.1832 data: 0.0913 max mem: 9377 +Train: [60] [6249/6250] eta: 0:00:00 lr: 0.000045 grad: 0.1833 (0.1956) loss: 0.6752 (0.6754) time: 0.1539 data: 0.0611 max mem: 9377 +Train: [60] Total time: 0:17:02 (0.1636 s / it) +Averaged stats: lr: 0.000045 grad: 0.1833 (0.1956) loss: 0.6752 (0.6754) +Eval (hcp-train-subset): [60] [ 0/62] eta: 0:04:06 loss: 0.8861 (0.8861) time: 3.9808 data: 3.8736 max mem: 9377 +Eval (hcp-train-subset): [60] [61/62] eta: 0:00:00 loss: 0.8996 (0.9010) time: 0.1193 data: 0.0924 max mem: 9377 +Eval (hcp-train-subset): [60] Total time: 0:00:14 (0.2384 s / it) +Averaged stats (hcp-train-subset): loss: 0.8996 (0.9010) +Eval (hcp-val): [60] [ 0/62] eta: 0:06:05 loss: 0.9028 (0.9028) time: 5.8995 data: 5.8676 max mem: 9377 +Eval (hcp-val): [60] [61/62] eta: 0:00:00 loss: 0.9022 (0.9022) time: 0.1571 data: 0.1321 max mem: 9377 +Eval (hcp-val): [60] Total time: 0:00:14 (0.2403 s / it) +Averaged stats (hcp-val): loss: 0.9022 (0.9022) +Eval (nsd-val): [60] [ 0/62] eta: 0:05:07 loss: 0.8869 (0.8869) time: 4.9652 data: 4.9280 max mem: 9377 +Eval (nsd-val): [60] [61/62] eta: 0:00:00 loss: 0.8879 (0.8925) time: 0.1600 data: 0.1337 max mem: 9377 +Eval (nsd-val): [60] Total time: 0:00:14 (0.2414 s / it) +Averaged stats (nsd-val): loss: 0.8879 (0.8925) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [61] [ 0/6250] eta: 8:46:09 lr: 0.000045 grad: 0.2123 (0.2123) loss: 0.7978 (0.7978) time: 5.0512 data: 4.6474 max mem: 9377 +Train: [61] [ 100/6250] eta: 0:24:27 lr: 0.000045 grad: 0.2705 (0.2935) loss: 0.7180 (0.7286) time: 0.1965 data: 0.0899 max mem: 9377 +Train: [61] [ 200/6250] eta: 0:21:17 lr: 0.000045 grad: 0.3131 (0.2995) loss: 0.6393 (0.7021) time: 0.1810 data: 0.0708 max mem: 9377 +Train: [61] [ 300/6250] eta: 0:19:47 lr: 0.000045 grad: 0.2976 (0.3022) loss: 0.6568 (0.6882) time: 0.1838 data: 0.0779 max mem: 9377 +Train: [61] [ 400/6250] eta: 0:18:52 lr: 0.000045 grad: 0.2433 (0.2933) loss: 0.6745 (0.6839) time: 0.1604 data: 0.0629 max mem: 9377 +Train: [61] [ 500/6250] eta: 0:18:11 lr: 0.000045 grad: 0.2104 (0.2787) loss: 0.6794 (0.6814) time: 0.1682 data: 0.0627 max mem: 9377 +Train: [61] [ 600/6250] eta: 0:17:24 lr: 0.000045 grad: 0.2134 (0.2680) loss: 0.6744 (0.6805) time: 0.1610 data: 0.0660 max mem: 9377 +Train: [61] [ 700/6250] eta: 0:16:58 lr: 0.000045 grad: 0.2031 (0.2598) loss: 0.6763 (0.6796) time: 0.2012 data: 0.1031 max mem: 9377 +Train: [61] [ 800/6250] eta: 0:16:28 lr: 0.000045 grad: 0.1847 (0.2513) loss: 0.6793 (0.6795) time: 0.1468 data: 0.0504 max mem: 9377 +Train: [61] [ 900/6250] eta: 0:15:58 lr: 0.000045 grad: 0.1826 (0.2445) loss: 0.6822 (0.6783) time: 0.1557 data: 0.0659 max mem: 9377 +Train: [61] [1000/6250] eta: 0:15:30 lr: 0.000045 grad: 0.1844 (0.2401) loss: 0.6792 (0.6784) time: 0.1438 data: 0.0636 max mem: 9377 +Train: [61] [1100/6250] eta: 0:15:06 lr: 0.000045 grad: 0.1780 (0.2349) loss: 0.6755 (0.6783) time: 0.1554 data: 0.0646 max mem: 9377 +Train: [61] [1200/6250] eta: 0:14:43 lr: 0.000045 grad: 0.1813 (0.2312) loss: 0.6767 (0.6783) time: 0.1627 data: 0.0773 max mem: 9377 +Train: [61] [1300/6250] eta: 0:14:21 lr: 0.000045 grad: 0.1857 (0.2279) loss: 0.6806 (0.6783) time: 0.1721 data: 0.0843 max mem: 9377 +Train: [61] [1400/6250] eta: 0:13:58 lr: 0.000045 grad: 0.1886 (0.2255) loss: 0.6781 (0.6780) time: 0.1509 data: 0.0476 max mem: 9377 +Train: [61] [1500/6250] eta: 0:13:35 lr: 0.000045 grad: 0.1889 (0.2230) loss: 0.6768 (0.6780) time: 0.1643 data: 0.0733 max mem: 9377 +Train: [61] [1600/6250] eta: 0:13:14 lr: 0.000045 grad: 0.1872 (0.2211) loss: 0.6697 (0.6779) time: 0.1313 data: 0.0446 max mem: 9377 +Train: [61] [1700/6250] eta: 0:12:55 lr: 0.000045 grad: 0.1872 (0.2194) loss: 0.6632 (0.6774) time: 0.1883 data: 0.1054 max mem: 9377 +Train: [61] [1800/6250] eta: 0:12:31 lr: 0.000045 grad: 0.1775 (0.2174) loss: 0.6784 (0.6773) time: 0.1104 data: 0.0256 max mem: 9377 +Train: [61] [1900/6250] eta: 0:12:12 lr: 0.000045 grad: 0.1801 (0.2159) loss: 0.6727 (0.6771) time: 0.1493 data: 0.0649 max mem: 9377 +Train: [61] [2000/6250] eta: 0:11:55 lr: 0.000045 grad: 0.1875 (0.2143) loss: 0.6839 (0.6769) time: 0.1881 data: 0.1026 max mem: 9377 +Train: [61] [2100/6250] eta: 0:11:37 lr: 0.000044 grad: 0.1844 (0.2129) loss: 0.6746 (0.6766) time: 0.1693 data: 0.0829 max mem: 9377 +Train: [61] [2200/6250] eta: 0:11:19 lr: 0.000044 grad: 0.1853 (0.2118) loss: 0.6642 (0.6763) time: 0.1752 data: 0.0818 max mem: 9377 +Train: [61] [2300/6250] eta: 0:11:01 lr: 0.000044 grad: 0.1842 (0.2109) loss: 0.6675 (0.6761) time: 0.1609 data: 0.0776 max mem: 9377 +Train: [61] [2400/6250] eta: 0:10:43 lr: 0.000044 grad: 0.1852 (0.2099) loss: 0.6593 (0.6759) time: 0.1790 data: 0.0893 max mem: 9377 +Train: [61] [2500/6250] eta: 0:10:25 lr: 0.000044 grad: 0.1849 (0.2090) loss: 0.6738 (0.6758) time: 0.1525 data: 0.0669 max mem: 9377 +Train: [61] [2600/6250] eta: 0:10:07 lr: 0.000044 grad: 0.1835 (0.2082) loss: 0.6628 (0.6757) time: 0.1426 data: 0.0500 max mem: 9377 +Train: [61] [2700/6250] eta: 0:09:50 lr: 0.000044 grad: 0.1902 (0.2076) loss: 0.6746 (0.6755) time: 0.1485 data: 0.0564 max mem: 9377 +Train: [61] [2800/6250] eta: 0:09:33 lr: 0.000044 grad: 0.1887 (0.2070) loss: 0.6734 (0.6754) time: 0.1642 data: 0.0726 max mem: 9377 +Train: [61] [2900/6250] eta: 0:09:16 lr: 0.000044 grad: 0.1796 (0.2065) loss: 0.6701 (0.6752) time: 0.1887 data: 0.1006 max mem: 9377 +Train: [61] [3000/6250] eta: 0:08:58 lr: 0.000044 grad: 0.1814 (0.2058) loss: 0.6753 (0.6753) time: 0.1430 data: 0.0483 max mem: 9377 +Train: [61] [3100/6250] eta: 0:08:40 lr: 0.000044 grad: 0.1854 (0.2052) loss: 0.6600 (0.6753) time: 0.1758 data: 0.0884 max mem: 9377 +Train: [61] [3200/6250] eta: 0:08:22 lr: 0.000044 grad: 0.1814 (0.2046) loss: 0.6726 (0.6752) time: 0.1902 data: 0.1004 max mem: 9377 +Train: [61] [3300/6250] eta: 0:08:05 lr: 0.000044 grad: 0.1872 (0.2042) loss: 0.6756 (0.6752) time: 0.1515 data: 0.0674 max mem: 9377 +Train: [61] [3400/6250] eta: 0:07:47 lr: 0.000044 grad: 0.1851 (0.2038) loss: 0.6854 (0.6753) time: 0.1487 data: 0.0540 max mem: 9377 +Train: [61] [3500/6250] eta: 0:07:30 lr: 0.000044 grad: 0.1979 (0.2034) loss: 0.6602 (0.6753) time: 0.1445 data: 0.0531 max mem: 9377 +Train: [61] [3600/6250] eta: 0:07:12 lr: 0.000044 grad: 0.1861 (0.2031) loss: 0.6791 (0.6753) time: 0.1481 data: 0.0623 max mem: 9377 +Train: [61] [3700/6250] eta: 0:06:56 lr: 0.000044 grad: 0.1872 (0.2027) loss: 0.6662 (0.6752) time: 0.1579 data: 0.0734 max mem: 9377 +Train: [61] [3800/6250] eta: 0:06:39 lr: 0.000044 grad: 0.1772 (0.2024) loss: 0.6823 (0.6752) time: 0.1472 data: 0.0538 max mem: 9377 +Train: [61] [3900/6250] eta: 0:06:23 lr: 0.000044 grad: 0.1849 (0.2022) loss: 0.6716 (0.6752) time: 0.1855 data: 0.1050 max mem: 9377 +Train: [61] [4000/6250] eta: 0:06:07 lr: 0.000044 grad: 0.1909 (0.2020) loss: 0.6658 (0.6752) time: 0.1683 data: 0.0811 max mem: 9377 +Train: [61] [4100/6250] eta: 0:05:51 lr: 0.000044 grad: 0.1793 (0.2016) loss: 0.6892 (0.6754) time: 0.1730 data: 0.0859 max mem: 9377 +Train: [61] [4200/6250] eta: 0:05:34 lr: 0.000044 grad: 0.1832 (0.2013) loss: 0.7016 (0.6756) time: 0.1644 data: 0.0823 max mem: 9377 +Train: [61] [4300/6250] eta: 0:05:18 lr: 0.000044 grad: 0.1841 (0.2010) loss: 0.6767 (0.6757) time: 0.1671 data: 0.0729 max mem: 9377 +Train: [61] [4400/6250] eta: 0:05:02 lr: 0.000044 grad: 0.1845 (0.2007) loss: 0.6828 (0.6758) time: 0.1681 data: 0.0709 max mem: 9377 +Train: [61] [4500/6250] eta: 0:04:45 lr: 0.000044 grad: 0.1893 (0.2004) loss: 0.6638 (0.6757) time: 0.1596 data: 0.0559 max mem: 9377 +Train: [61] [4600/6250] eta: 0:04:28 lr: 0.000044 grad: 0.1893 (0.2004) loss: 0.6779 (0.6757) time: 0.1650 data: 0.0721 max mem: 9377 +Train: [61] [4700/6250] eta: 0:04:12 lr: 0.000044 grad: 0.1879 (0.2001) loss: 0.6754 (0.6758) time: 0.1498 data: 0.0524 max mem: 9377 +Train: [61] [4800/6250] eta: 0:03:55 lr: 0.000044 grad: 0.1918 (0.1999) loss: 0.6614 (0.6758) time: 0.1681 data: 0.0803 max mem: 9377 +Train: [61] [4900/6250] eta: 0:03:39 lr: 0.000044 grad: 0.1805 (0.1996) loss: 0.6766 (0.6758) time: 0.1479 data: 0.0625 max mem: 9377 +Train: [61] [5000/6250] eta: 0:03:22 lr: 0.000044 grad: 0.1848 (0.1993) loss: 0.6822 (0.6759) time: 0.1630 data: 0.0827 max mem: 9377 +Train: [61] [5100/6250] eta: 0:03:06 lr: 0.000044 grad: 0.1844 (0.1991) loss: 0.6770 (0.6759) time: 0.1658 data: 0.0655 max mem: 9377 +Train: [61] [5200/6250] eta: 0:02:49 lr: 0.000044 grad: 0.1841 (0.1989) loss: 0.6807 (0.6759) time: 0.1247 data: 0.0292 max mem: 9377 +Train: [61] [5300/6250] eta: 0:02:33 lr: 0.000043 grad: 0.1946 (0.1988) loss: 0.6769 (0.6760) time: 0.1448 data: 0.0596 max mem: 9377 +Train: [61] [5400/6250] eta: 0:02:17 lr: 0.000043 grad: 0.1822 (0.1987) loss: 0.6858 (0.6760) time: 0.1568 data: 0.0731 max mem: 9377 +Train: [61] [5500/6250] eta: 0:02:00 lr: 0.000043 grad: 0.1861 (0.1985) loss: 0.6606 (0.6760) time: 0.1600 data: 0.0712 max mem: 9377 +Train: [61] [5600/6250] eta: 0:01:44 lr: 0.000043 grad: 0.1822 (0.1984) loss: 0.6669 (0.6759) time: 0.1693 data: 0.0866 max mem: 9377 +Train: [61] [5700/6250] eta: 0:01:28 lr: 0.000043 grad: 0.1962 (0.1984) loss: 0.6763 (0.6758) time: 0.1474 data: 0.0494 max mem: 9377 +Train: [61] [5800/6250] eta: 0:01:12 lr: 0.000043 grad: 0.1851 (0.1983) loss: 0.6860 (0.6759) time: 0.1548 data: 0.0729 max mem: 9377 +Train: [61] [5900/6250] eta: 0:00:56 lr: 0.000043 grad: 0.1855 (0.1982) loss: 0.6728 (0.6759) time: 0.1694 data: 0.0824 max mem: 9377 +Train: [61] [6000/6250] eta: 0:00:40 lr: 0.000043 grad: 0.1890 (0.1981) loss: 0.6810 (0.6759) time: 0.2280 data: 0.1428 max mem: 9377 +Train: [61] [6100/6250] eta: 0:00:24 lr: 0.000043 grad: 0.1840 (0.1980) loss: 0.6790 (0.6759) time: 0.1321 data: 0.0352 max mem: 9377 +Train: [61] [6200/6250] eta: 0:00:08 lr: 0.000043 grad: 0.1799 (0.1979) loss: 0.6684 (0.6759) time: 0.1311 data: 0.0408 max mem: 9377 +Train: [61] [6249/6250] eta: 0:00:00 lr: 0.000043 grad: 0.1870 (0.1978) loss: 0.6738 (0.6759) time: 0.1612 data: 0.0817 max mem: 9377 +Train: [61] Total time: 0:16:51 (0.1619 s / it) +Averaged stats: lr: 0.000043 grad: 0.1870 (0.1978) loss: 0.6738 (0.6759) +Eval (hcp-train-subset): [61] [ 0/62] eta: 0:06:28 loss: 0.8876 (0.8876) time: 6.2713 data: 6.2392 max mem: 9377 +Eval (hcp-train-subset): [61] [61/62] eta: 0:00:00 loss: 0.8991 (0.8997) time: 0.1333 data: 0.1082 max mem: 9377 +Eval (hcp-train-subset): [61] Total time: 0:00:14 (0.2414 s / it) +Averaged stats (hcp-train-subset): loss: 0.8991 (0.8997) +Eval (hcp-val): [61] [ 0/62] eta: 0:06:16 loss: 0.9150 (0.9150) time: 6.0700 data: 6.0387 max mem: 9377 +Eval (hcp-val): [61] [61/62] eta: 0:00:00 loss: 0.8959 (0.9002) time: 0.1438 data: 0.1171 max mem: 9377 +Eval (hcp-val): [61] Total time: 0:00:14 (0.2383 s / it) +Averaged stats (hcp-val): loss: 0.8959 (0.9002) +Eval (nsd-val): [61] [ 0/62] eta: 0:03:40 loss: 0.8944 (0.8944) time: 3.5542 data: 3.4673 max mem: 9377 +Eval (nsd-val): [61] [61/62] eta: 0:00:00 loss: 0.8922 (0.8946) time: 0.1205 data: 0.0934 max mem: 9377 +Eval (nsd-val): [61] Total time: 0:00:14 (0.2307 s / it) +Averaged stats (nsd-val): loss: 0.8922 (0.8946) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [62] [ 0/6250] eta: 9:24:11 lr: 0.000043 grad: 0.1970 (0.1970) loss: 0.7817 (0.7817) time: 5.4162 data: 5.2356 max mem: 9377 +Train: [62] [ 100/6250] eta: 0:23:11 lr: 0.000043 grad: 0.3231 (0.3760) loss: 0.6781 (0.6953) time: 0.1676 data: 0.0640 max mem: 9377 +Train: [62] [ 200/6250] eta: 0:20:04 lr: 0.000043 grad: 0.3952 (0.3929) loss: 0.6720 (0.6866) time: 0.1462 data: 0.0406 max mem: 9377 +Train: [62] [ 300/6250] eta: 0:18:39 lr: 0.000043 grad: 0.2710 (0.3656) loss: 0.6718 (0.6811) time: 0.1768 data: 0.0918 max mem: 9377 +Train: [62] [ 400/6250] eta: 0:17:34 lr: 0.000043 grad: 0.2598 (0.3413) loss: 0.6696 (0.6779) time: 0.1418 data: 0.0397 max mem: 9377 +Train: [62] [ 500/6250] eta: 0:16:53 lr: 0.000043 grad: 0.2201 (0.3201) loss: 0.6671 (0.6770) time: 0.1691 data: 0.0689 max mem: 9377 +Train: [62] [ 600/6250] eta: 0:16:17 lr: 0.000043 grad: 0.1939 (0.3015) loss: 0.6694 (0.6760) time: 0.1720 data: 0.0872 max mem: 9377 +Train: [62] [ 700/6250] eta: 0:15:44 lr: 0.000043 grad: 0.1970 (0.2869) loss: 0.6699 (0.6756) time: 0.1368 data: 0.0261 max mem: 9377 +Train: [62] [ 800/6250] eta: 0:15:35 lr: 0.000043 grad: 0.1861 (0.2757) loss: 0.6663 (0.6747) time: 0.2023 data: 0.1075 max mem: 9377 +Train: [62] [ 900/6250] eta: 0:15:19 lr: 0.000043 grad: 0.1950 (0.2670) loss: 0.6549 (0.6739) time: 0.2085 data: 0.1075 max mem: 9377 +Train: [62] [1000/6250] eta: 0:15:06 lr: 0.000043 grad: 0.1914 (0.2602) loss: 0.6691 (0.6728) time: 0.1971 data: 0.1087 max mem: 9377 +Train: [62] [1100/6250] eta: 0:14:53 lr: 0.000043 grad: 0.1862 (0.2541) loss: 0.6610 (0.6719) time: 0.1955 data: 0.0962 max mem: 9377 +Train: [62] [1200/6250] eta: 0:14:36 lr: 0.000043 grad: 0.1924 (0.2491) loss: 0.6541 (0.6715) time: 0.1859 data: 0.0917 max mem: 9377 +Train: [62] [1300/6250] eta: 0:14:14 lr: 0.000043 grad: 0.1906 (0.2449) loss: 0.6517 (0.6709) time: 0.1656 data: 0.0751 max mem: 9377 +Train: [62] [1400/6250] eta: 0:13:50 lr: 0.000043 grad: 0.1812 (0.2407) loss: 0.6660 (0.6706) time: 0.1552 data: 0.0549 max mem: 9377 +Train: [62] [1500/6250] eta: 0:13:30 lr: 0.000043 grad: 0.1799 (0.2372) loss: 0.6703 (0.6705) time: 0.1566 data: 0.0620 max mem: 9377 +Train: [62] [1600/6250] eta: 0:13:10 lr: 0.000043 grad: 0.1813 (0.2341) loss: 0.6683 (0.6703) time: 0.1720 data: 0.0921 max mem: 9377 +Train: [62] [1700/6250] eta: 0:12:56 lr: 0.000043 grad: 0.1838 (0.2314) loss: 0.6630 (0.6699) time: 0.1188 data: 0.0294 max mem: 9377 +Train: [62] [1800/6250] eta: 0:12:34 lr: 0.000043 grad: 0.1854 (0.2289) loss: 0.6769 (0.6696) time: 0.1470 data: 0.0518 max mem: 9377 +Train: [62] [1900/6250] eta: 0:12:13 lr: 0.000043 grad: 0.1853 (0.2267) loss: 0.6766 (0.6696) time: 0.1510 data: 0.0578 max mem: 9377 +Train: [62] [2000/6250] eta: 0:11:54 lr: 0.000043 grad: 0.1949 (0.2254) loss: 0.6677 (0.6694) time: 0.1250 data: 0.0351 max mem: 9377 +Train: [62] [2100/6250] eta: 0:11:36 lr: 0.000043 grad: 0.1771 (0.2240) loss: 0.6772 (0.6692) time: 0.1807 data: 0.0931 max mem: 9377 +Train: [62] [2200/6250] eta: 0:11:23 lr: 0.000042 grad: 0.1841 (0.2225) loss: 0.6685 (0.6691) time: 0.2218 data: 0.1335 max mem: 9377 +Train: [62] [2300/6250] eta: 0:11:09 lr: 0.000042 grad: 0.1928 (0.2211) loss: 0.6525 (0.6692) time: 0.1831 data: 0.1004 max mem: 9377 +Train: [62] [2400/6250] eta: 0:10:53 lr: 0.000042 grad: 0.1849 (0.2199) loss: 0.6742 (0.6691) time: 0.1545 data: 0.0699 max mem: 9377 +Train: [62] [2500/6250] eta: 0:10:37 lr: 0.000042 grad: 0.1875 (0.2188) loss: 0.6699 (0.6690) time: 0.1680 data: 0.0687 max mem: 9377 +Train: [62] [2600/6250] eta: 0:10:20 lr: 0.000042 grad: 0.1881 (0.2177) loss: 0.6635 (0.6690) time: 0.1801 data: 0.0841 max mem: 9377 +Train: [62] [2700/6250] eta: 0:10:04 lr: 0.000042 grad: 0.1864 (0.2169) loss: 0.6544 (0.6690) time: 0.1748 data: 0.0799 max mem: 9377 +Train: [62] [2800/6250] eta: 0:09:47 lr: 0.000042 grad: 0.1841 (0.2158) loss: 0.6719 (0.6691) time: 0.1826 data: 0.0902 max mem: 9377 +Train: [62] [2900/6250] eta: 0:09:30 lr: 0.000042 grad: 0.1822 (0.2148) loss: 0.6649 (0.6693) time: 0.1424 data: 0.0456 max mem: 9377 +Train: [62] [3000/6250] eta: 0:09:12 lr: 0.000042 grad: 0.1844 (0.2139) loss: 0.6716 (0.6694) time: 0.1693 data: 0.0814 max mem: 9377 +Train: [62] [3100/6250] eta: 0:08:54 lr: 0.000042 grad: 0.1812 (0.2131) loss: 0.6745 (0.6695) time: 0.1658 data: 0.0751 max mem: 9377 +Train: [62] [3200/6250] eta: 0:08:35 lr: 0.000042 grad: 0.1858 (0.2126) loss: 0.6772 (0.6697) time: 0.1460 data: 0.0575 max mem: 9377 +Train: [62] [3300/6250] eta: 0:08:18 lr: 0.000042 grad: 0.1820 (0.2117) loss: 0.6812 (0.6699) time: 0.1744 data: 0.0841 max mem: 9377 +Train: [62] [3400/6250] eta: 0:08:02 lr: 0.000042 grad: 0.1852 (0.2111) loss: 0.6733 (0.6698) time: 0.2338 data: 0.1457 max mem: 9377 +Train: [62] [3500/6250] eta: 0:07:43 lr: 0.000042 grad: 0.1849 (0.2105) loss: 0.6649 (0.6698) time: 0.1707 data: 0.0828 max mem: 9377 +Train: [62] [3600/6250] eta: 0:07:25 lr: 0.000042 grad: 0.1833 (0.2099) loss: 0.6636 (0.6698) time: 0.1578 data: 0.0717 max mem: 9377 +Train: [62] [3700/6250] eta: 0:07:09 lr: 0.000042 grad: 0.1840 (0.2094) loss: 0.6740 (0.6698) time: 0.1822 data: 0.0973 max mem: 9377 +Train: [62] [3800/6250] eta: 0:06:52 lr: 0.000042 grad: 0.1871 (0.2088) loss: 0.6609 (0.6698) time: 0.2220 data: 0.1452 max mem: 9377 +Train: [62] [3900/6250] eta: 0:06:36 lr: 0.000042 grad: 0.1785 (0.2083) loss: 0.6663 (0.6697) time: 0.1748 data: 0.0872 max mem: 9377 +Train: [62] [4000/6250] eta: 0:06:18 lr: 0.000042 grad: 0.1790 (0.2078) loss: 0.6737 (0.6696) time: 0.1639 data: 0.0802 max mem: 9377 +Train: [62] [4100/6250] eta: 0:06:00 lr: 0.000042 grad: 0.1792 (0.2073) loss: 0.6718 (0.6696) time: 0.1452 data: 0.0655 max mem: 9377 +Train: [62] [4200/6250] eta: 0:05:43 lr: 0.000042 grad: 0.1802 (0.2068) loss: 0.6730 (0.6696) time: 0.1395 data: 0.0439 max mem: 9377 +Train: [62] [4300/6250] eta: 0:05:27 lr: 0.000042 grad: 0.1850 (0.2065) loss: 0.6722 (0.6695) time: 0.1419 data: 0.0503 max mem: 9377 +Train: [62] [4400/6250] eta: 0:05:09 lr: 0.000042 grad: 0.1802 (0.2061) loss: 0.6809 (0.6696) time: 0.1682 data: 0.0726 max mem: 9377 +Train: [62] [4500/6250] eta: 0:04:52 lr: 0.000042 grad: 0.1847 (0.2057) loss: 0.6824 (0.6696) time: 0.1592 data: 0.0662 max mem: 9377 +Train: [62] [4600/6250] eta: 0:04:35 lr: 0.000042 grad: 0.1863 (0.2054) loss: 0.6701 (0.6697) time: 0.1783 data: 0.0864 max mem: 9377 +Train: [62] [4700/6250] eta: 0:04:18 lr: 0.000042 grad: 0.1856 (0.2051) loss: 0.6770 (0.6698) time: 0.1628 data: 0.0716 max mem: 9377 +Train: [62] [4800/6250] eta: 0:04:01 lr: 0.000042 grad: 0.1929 (0.2049) loss: 0.6767 (0.6698) time: 0.1485 data: 0.0541 max mem: 9377 +Train: [62] [4900/6250] eta: 0:03:44 lr: 0.000042 grad: 0.1823 (0.2047) loss: 0.6740 (0.6700) time: 0.1508 data: 0.0606 max mem: 9377 +Train: [62] [5000/6250] eta: 0:03:27 lr: 0.000042 grad: 0.1809 (0.2043) loss: 0.6904 (0.6701) time: 0.1320 data: 0.0434 max mem: 9377 +Train: [62] [5100/6250] eta: 0:03:10 lr: 0.000042 grad: 0.1780 (0.2040) loss: 0.6833 (0.6703) time: 0.1413 data: 0.0517 max mem: 9377 +Train: [62] [5200/6250] eta: 0:02:53 lr: 0.000042 grad: 0.1886 (0.2037) loss: 0.6640 (0.6704) time: 0.1402 data: 0.0606 max mem: 9377 +Train: [62] [5300/6250] eta: 0:02:37 lr: 0.000042 grad: 0.1829 (0.2034) loss: 0.6797 (0.6706) time: 0.1443 data: 0.0512 max mem: 9377 +Train: [62] [5400/6250] eta: 0:02:20 lr: 0.000041 grad: 0.1866 (0.2032) loss: 0.6799 (0.6707) time: 0.1647 data: 0.0776 max mem: 9377 +Train: [62] [5500/6250] eta: 0:02:03 lr: 0.000041 grad: 0.1827 (0.2030) loss: 0.6764 (0.6708) time: 0.1593 data: 0.0596 max mem: 9377 +Train: [62] [5600/6250] eta: 0:01:47 lr: 0.000041 grad: 0.1839 (0.2027) loss: 0.6760 (0.6709) time: 0.1456 data: 0.0539 max mem: 9377 +Train: [62] [5700/6250] eta: 0:01:30 lr: 0.000041 grad: 0.1789 (0.2024) loss: 0.6877 (0.6710) time: 0.1458 data: 0.0530 max mem: 9377 +Train: [62] [5800/6250] eta: 0:01:13 lr: 0.000041 grad: 0.1890 (0.2022) loss: 0.6581 (0.6711) time: 0.1614 data: 0.0769 max mem: 9377 +Train: [62] [5900/6250] eta: 0:00:57 lr: 0.000041 grad: 0.1839 (0.2021) loss: 0.6636 (0.6711) time: 0.1293 data: 0.0425 max mem: 9377 +Train: [62] [6000/6250] eta: 0:00:41 lr: 0.000041 grad: 0.1785 (0.2019) loss: 0.6876 (0.6711) time: 0.1454 data: 0.0602 max mem: 9377 +Train: [62] [6100/6250] eta: 0:00:24 lr: 0.000041 grad: 0.1851 (0.2016) loss: 0.6868 (0.6713) time: 0.1616 data: 0.0830 max mem: 9377 +Train: [62] [6200/6250] eta: 0:00:08 lr: 0.000041 grad: 0.1805 (0.2014) loss: 0.6878 (0.6714) time: 0.1631 data: 0.0767 max mem: 9377 +Train: [62] [6249/6250] eta: 0:00:00 lr: 0.000041 grad: 0.1843 (0.2013) loss: 0.6730 (0.6714) time: 0.1418 data: 0.0500 max mem: 9377 +Train: [62] Total time: 0:17:10 (0.1648 s / it) +Averaged stats: lr: 0.000041 grad: 0.1843 (0.2013) loss: 0.6730 (0.6714) +Eval (hcp-train-subset): [62] [ 0/62] eta: 0:05:12 loss: 0.8831 (0.8831) time: 5.0454 data: 4.9990 max mem: 9377 +Eval (hcp-train-subset): [62] [61/62] eta: 0:00:00 loss: 0.9004 (0.9013) time: 0.1546 data: 0.1294 max mem: 9377 +Eval (hcp-train-subset): [62] Total time: 0:00:14 (0.2404 s / it) +Averaged stats (hcp-train-subset): loss: 0.9004 (0.9013) +Eval (hcp-val): [62] [ 0/62] eta: 0:06:28 loss: 0.8988 (0.8988) time: 6.2728 data: 6.2432 max mem: 9377 +Eval (hcp-val): [62] [61/62] eta: 0:00:00 loss: 0.8991 (0.9013) time: 0.1112 data: 0.0858 max mem: 9377 +Eval (hcp-val): [62] Total time: 0:00:14 (0.2415 s / it) +Averaged stats (hcp-val): loss: 0.8991 (0.9013) +Eval (nsd-val): [62] [ 0/62] eta: 0:04:22 loss: 0.8890 (0.8890) time: 4.2300 data: 4.1661 max mem: 9377 +Eval (nsd-val): [62] [61/62] eta: 0:00:00 loss: 0.8944 (0.8951) time: 0.1181 data: 0.0930 max mem: 9377 +Eval (nsd-val): [62] Total time: 0:00:14 (0.2380 s / it) +Averaged stats (nsd-val): loss: 0.8944 (0.8951) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [63] [ 0/6250] eta: 10:48:07 lr: 0.000041 grad: 0.2385 (0.2385) loss: 0.7498 (0.7498) time: 6.2220 data: 6.0951 max mem: 9377 +Train: [63] [ 100/6250] eta: 0:22:32 lr: 0.000041 grad: 0.3705 (0.4066) loss: 0.6447 (0.6807) time: 0.1728 data: 0.0638 max mem: 9377 +Train: [63] [ 200/6250] eta: 0:19:48 lr: 0.000041 grad: 0.3121 (0.3851) loss: 0.6913 (0.6785) time: 0.1844 data: 0.0782 max mem: 9377 +Train: [63] [ 300/6250] eta: 0:18:50 lr: 0.000041 grad: 0.2340 (0.3424) loss: 0.6768 (0.6786) time: 0.1716 data: 0.0629 max mem: 9377 +Train: [63] [ 400/6250] eta: 0:18:19 lr: 0.000041 grad: 0.2472 (0.3224) loss: 0.6694 (0.6760) time: 0.1569 data: 0.0515 max mem: 9377 +Train: [63] [ 500/6250] eta: 0:17:38 lr: 0.000041 grad: 0.2300 (0.3072) loss: 0.6461 (0.6743) time: 0.1648 data: 0.0596 max mem: 9377 +Train: [63] [ 600/6250] eta: 0:16:56 lr: 0.000041 grad: 0.2278 (0.2943) loss: 0.6699 (0.6729) time: 0.1587 data: 0.0633 max mem: 9377 +Train: [63] [ 700/6250] eta: 0:16:33 lr: 0.000041 grad: 0.1988 (0.2817) loss: 0.6659 (0.6717) time: 0.1422 data: 0.0443 max mem: 9377 +Train: [63] [ 800/6250] eta: 0:16:04 lr: 0.000041 grad: 0.2004 (0.2722) loss: 0.6654 (0.6695) time: 0.1679 data: 0.0741 max mem: 9377 +Train: [63] [ 900/6250] eta: 0:15:45 lr: 0.000041 grad: 0.2002 (0.2642) loss: 0.6489 (0.6684) time: 0.1812 data: 0.0819 max mem: 9377 +Train: [63] [1000/6250] eta: 0:15:17 lr: 0.000041 grad: 0.1990 (0.2571) loss: 0.6513 (0.6675) time: 0.1685 data: 0.0742 max mem: 9377 +Train: [63] [1100/6250] eta: 0:14:53 lr: 0.000041 grad: 0.1933 (0.2513) loss: 0.6605 (0.6663) time: 0.1838 data: 0.0887 max mem: 9377 +Train: [63] [1200/6250] eta: 0:14:32 lr: 0.000041 grad: 0.2001 (0.2469) loss: 0.6516 (0.6652) time: 0.1902 data: 0.1052 max mem: 9377 +Train: [63] [1300/6250] eta: 0:14:09 lr: 0.000041 grad: 0.1892 (0.2428) loss: 0.6476 (0.6646) time: 0.1666 data: 0.0818 max mem: 9377 +Train: [63] [1400/6250] eta: 0:13:47 lr: 0.000041 grad: 0.1840 (0.2392) loss: 0.6590 (0.6642) time: 0.1422 data: 0.0427 max mem: 9377 +Train: [63] [1500/6250] eta: 0:13:26 lr: 0.000041 grad: 0.1892 (0.2359) loss: 0.6600 (0.6639) time: 0.1630 data: 0.0779 max mem: 9377 +Train: [63] [1600/6250] eta: 0:13:03 lr: 0.000041 grad: 0.1805 (0.2328) loss: 0.6624 (0.6639) time: 0.1355 data: 0.0395 max mem: 9377 +Train: [63] [1700/6250] eta: 0:12:43 lr: 0.000041 grad: 0.1810 (0.2301) loss: 0.6541 (0.6634) time: 0.1379 data: 0.0431 max mem: 9377 +Train: [63] [1800/6250] eta: 0:12:23 lr: 0.000041 grad: 0.1835 (0.2278) loss: 0.6699 (0.6633) time: 0.1588 data: 0.0673 max mem: 9377 +Train: [63] [1900/6250] eta: 0:12:05 lr: 0.000041 grad: 0.1767 (0.2261) loss: 0.6722 (0.6634) time: 0.1544 data: 0.0696 max mem: 9377 +Train: [63] [2000/6250] eta: 0:11:47 lr: 0.000041 grad: 0.1873 (0.2243) loss: 0.6600 (0.6633) time: 0.1840 data: 0.0918 max mem: 9377 +Train: [63] [2100/6250] eta: 0:11:29 lr: 0.000041 grad: 0.1890 (0.2227) loss: 0.6494 (0.6633) time: 0.1568 data: 0.0745 max mem: 9377 +Train: [63] [2200/6250] eta: 0:11:14 lr: 0.000041 grad: 0.1863 (0.2212) loss: 0.6542 (0.6636) time: 0.1602 data: 0.0749 max mem: 9377 +Train: [63] [2300/6250] eta: 0:10:59 lr: 0.000041 grad: 0.1880 (0.2199) loss: 0.6685 (0.6638) time: 0.1980 data: 0.1137 max mem: 9377 +Train: [63] [2400/6250] eta: 0:10:43 lr: 0.000040 grad: 0.1911 (0.2190) loss: 0.6594 (0.6639) time: 0.1669 data: 0.0832 max mem: 9377 +Train: [63] [2500/6250] eta: 0:10:27 lr: 0.000040 grad: 0.1902 (0.2180) loss: 0.6428 (0.6638) time: 0.1473 data: 0.0677 max mem: 9377 +Train: [63] [2600/6250] eta: 0:10:10 lr: 0.000040 grad: 0.1848 (0.2170) loss: 0.6532 (0.6637) time: 0.1763 data: 0.0809 max mem: 9377 +Train: [63] [2700/6250] eta: 0:09:53 lr: 0.000040 grad: 0.1853 (0.2159) loss: 0.6703 (0.6639) time: 0.1547 data: 0.0487 max mem: 9377 +Train: [63] [2800/6250] eta: 0:09:35 lr: 0.000040 grad: 0.1837 (0.2151) loss: 0.6627 (0.6640) time: 0.1787 data: 0.0889 max mem: 9377 +Train: [63] [2900/6250] eta: 0:09:17 lr: 0.000040 grad: 0.1818 (0.2140) loss: 0.6679 (0.6642) time: 0.1609 data: 0.0697 max mem: 9377 +Train: [63] [3000/6250] eta: 0:09:00 lr: 0.000040 grad: 0.1845 (0.2131) loss: 0.6638 (0.6643) time: 0.1623 data: 0.0714 max mem: 9377 +Train: [63] [3100/6250] eta: 0:08:42 lr: 0.000040 grad: 0.1827 (0.2125) loss: 0.6709 (0.6644) time: 0.1390 data: 0.0352 max mem: 9377 +Train: [63] [3200/6250] eta: 0:08:24 lr: 0.000040 grad: 0.1864 (0.2118) loss: 0.6667 (0.6645) time: 0.1561 data: 0.0650 max mem: 9377 +Train: [63] [3300/6250] eta: 0:08:07 lr: 0.000040 grad: 0.1812 (0.2111) loss: 0.6795 (0.6647) time: 0.1621 data: 0.0750 max mem: 9377 +Train: [63] [3400/6250] eta: 0:07:50 lr: 0.000040 grad: 0.1861 (0.2104) loss: 0.6687 (0.6648) time: 0.1229 data: 0.0159 max mem: 9377 +Train: [63] [3500/6250] eta: 0:07:33 lr: 0.000040 grad: 0.1839 (0.2101) loss: 0.6696 (0.6649) time: 0.1617 data: 0.0813 max mem: 9377 +Train: [63] [3600/6250] eta: 0:07:17 lr: 0.000040 grad: 0.1858 (0.2096) loss: 0.6526 (0.6648) time: 0.1314 data: 0.0414 max mem: 9377 +Train: [63] [3700/6250] eta: 0:06:59 lr: 0.000040 grad: 0.1903 (0.2090) loss: 0.6593 (0.6648) time: 0.1457 data: 0.0563 max mem: 9377 +Train: [63] [3800/6250] eta: 0:06:42 lr: 0.000040 grad: 0.1788 (0.2086) loss: 0.6635 (0.6648) time: 0.1567 data: 0.0733 max mem: 9377 +Train: [63] [3900/6250] eta: 0:06:26 lr: 0.000040 grad: 0.1829 (0.2082) loss: 0.6726 (0.6648) time: 0.1660 data: 0.0817 max mem: 9377 +Train: [63] [4000/6250] eta: 0:06:10 lr: 0.000040 grad: 0.1854 (0.2077) loss: 0.6736 (0.6648) time: 0.1394 data: 0.0545 max mem: 9377 +Train: [63] [4100/6250] eta: 0:05:54 lr: 0.000040 grad: 0.1863 (0.2073) loss: 0.6592 (0.6650) time: 0.1734 data: 0.0924 max mem: 9377 +Train: [63] [4200/6250] eta: 0:05:38 lr: 0.000040 grad: 0.1843 (0.2070) loss: 0.6736 (0.6651) time: 0.1709 data: 0.0788 max mem: 9377 +Train: [63] [4300/6250] eta: 0:05:23 lr: 0.000040 grad: 0.1885 (0.2067) loss: 0.6727 (0.6653) time: 0.1868 data: 0.0935 max mem: 9377 +Train: [63] [4400/6250] eta: 0:05:07 lr: 0.000040 grad: 0.1882 (0.2065) loss: 0.6638 (0.6654) time: 0.2064 data: 0.1250 max mem: 9377 +Train: [63] [4500/6250] eta: 0:04:50 lr: 0.000040 grad: 0.1982 (0.2062) loss: 0.6670 (0.6655) time: 0.1454 data: 0.0499 max mem: 9377 +Train: [63] [4600/6250] eta: 0:04:33 lr: 0.000040 grad: 0.1862 (0.2060) loss: 0.6670 (0.6655) time: 0.1437 data: 0.0472 max mem: 9377 +Train: [63] [4700/6250] eta: 0:04:16 lr: 0.000040 grad: 0.1864 (0.2057) loss: 0.6601 (0.6656) time: 0.1459 data: 0.0541 max mem: 9377 +Train: [63] [4800/6250] eta: 0:04:00 lr: 0.000040 grad: 0.1857 (0.2054) loss: 0.6652 (0.6656) time: 0.1823 data: 0.0859 max mem: 9377 +Train: [63] [4900/6250] eta: 0:03:43 lr: 0.000040 grad: 0.1923 (0.2051) loss: 0.6673 (0.6657) time: 0.1750 data: 0.0847 max mem: 9377 +Train: [63] [5000/6250] eta: 0:03:26 lr: 0.000040 grad: 0.1890 (0.2048) loss: 0.6630 (0.6658) time: 0.1600 data: 0.0675 max mem: 9377 +Train: [63] [5100/6250] eta: 0:03:10 lr: 0.000040 grad: 0.1872 (0.2046) loss: 0.6664 (0.6658) time: 0.1685 data: 0.0778 max mem: 9377 +Train: [63] [5200/6250] eta: 0:02:53 lr: 0.000040 grad: 0.1827 (0.2044) loss: 0.6728 (0.6658) time: 0.1659 data: 0.0846 max mem: 9377 +Train: [63] [5300/6250] eta: 0:02:37 lr: 0.000040 grad: 0.1889 (0.2041) loss: 0.6677 (0.6658) time: 0.1632 data: 0.0742 max mem: 9377 +Train: [63] [5400/6250] eta: 0:02:20 lr: 0.000040 grad: 0.1842 (0.2038) loss: 0.6779 (0.6658) time: 0.1553 data: 0.0626 max mem: 9377 +Train: [63] [5500/6250] eta: 0:02:03 lr: 0.000040 grad: 0.1853 (0.2036) loss: 0.6632 (0.6658) time: 0.1221 data: 0.0257 max mem: 9377 +Train: [63] [5600/6250] eta: 0:01:47 lr: 0.000039 grad: 0.1897 (0.2034) loss: 0.6649 (0.6657) time: 0.1628 data: 0.0705 max mem: 9377 +Train: [63] [5700/6250] eta: 0:01:30 lr: 0.000039 grad: 0.1945 (0.2033) loss: 0.6583 (0.6657) time: 0.1505 data: 0.0648 max mem: 9377 +Train: [63] [5800/6250] eta: 0:01:14 lr: 0.000039 grad: 0.1916 (0.2031) loss: 0.6667 (0.6657) time: 0.1812 data: 0.0977 max mem: 9377 +Train: [63] [5900/6250] eta: 0:00:57 lr: 0.000039 grad: 0.1870 (0.2030) loss: 0.6688 (0.6657) time: 0.1676 data: 0.0771 max mem: 9377 +Train: [63] [6000/6250] eta: 0:00:41 lr: 0.000039 grad: 0.1846 (0.2029) loss: 0.6737 (0.6657) time: 0.1776 data: 0.0972 max mem: 9377 +Train: [63] [6100/6250] eta: 0:00:24 lr: 0.000039 grad: 0.1839 (0.2027) loss: 0.6757 (0.6658) time: 0.1787 data: 0.0884 max mem: 9377 +Train: [63] [6200/6250] eta: 0:00:08 lr: 0.000039 grad: 0.1863 (0.2026) loss: 0.6714 (0.6658) time: 0.1749 data: 0.0875 max mem: 9377 +Train: [63] [6249/6250] eta: 0:00:00 lr: 0.000039 grad: 0.1894 (0.2025) loss: 0.6756 (0.6658) time: 0.1543 data: 0.0610 max mem: 9377 +Train: [63] Total time: 0:17:16 (0.1658 s / it) +Averaged stats: lr: 0.000039 grad: 0.1894 (0.2025) loss: 0.6756 (0.6658) +Eval (hcp-train-subset): [63] [ 0/62] eta: 0:03:50 loss: 0.8885 (0.8885) time: 3.7209 data: 3.6715 max mem: 9377 +Eval (hcp-train-subset): [63] [61/62] eta: 0:00:00 loss: 0.9013 (0.9021) time: 0.1603 data: 0.1349 max mem: 9377 +Eval (hcp-train-subset): [63] Total time: 0:00:16 (0.2595 s / it) +Averaged stats (hcp-train-subset): loss: 0.9013 (0.9021) +Eval (hcp-val): [63] [ 0/62] eta: 0:05:25 loss: 0.9049 (0.9049) time: 5.2548 data: 5.1664 max mem: 9377 +Eval (hcp-val): [63] [61/62] eta: 0:00:00 loss: 0.8987 (0.9020) time: 0.1346 data: 0.1092 max mem: 9377 +Eval (hcp-val): [63] Total time: 0:00:15 (0.2487 s / it) +Averaged stats (hcp-val): loss: 0.8987 (0.9020) +Eval (nsd-val): [63] [ 0/62] eta: 0:05:08 loss: 0.8919 (0.8919) time: 4.9803 data: 4.8944 max mem: 9377 +Eval (nsd-val): [63] [61/62] eta: 0:00:00 loss: 0.8992 (0.9019) time: 0.1589 data: 0.1333 max mem: 9377 +Eval (nsd-val): [63] Total time: 0:00:15 (0.2443 s / it) +Averaged stats (nsd-val): loss: 0.8992 (0.9019) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [64] [ 0/6250] eta: 11:16:13 lr: 0.000039 grad: 0.2420 (0.2420) loss: 0.7458 (0.7458) time: 6.4917 data: 6.3854 max mem: 9377 +Train: [64] [ 100/6250] eta: 0:23:51 lr: 0.000039 grad: 0.3264 (0.3352) loss: 0.6924 (0.7123) time: 0.1884 data: 0.0775 max mem: 9377 +Train: [64] [ 200/6250] eta: 0:20:34 lr: 0.000039 grad: 0.3297 (0.3346) loss: 0.6958 (0.6982) time: 0.1718 data: 0.0613 max mem: 9377 +Train: [64] [ 300/6250] eta: 0:19:05 lr: 0.000039 grad: 0.2767 (0.3230) loss: 0.6878 (0.6922) time: 0.1748 data: 0.0779 max mem: 9377 +Train: [64] [ 400/6250] eta: 0:18:05 lr: 0.000039 grad: 0.2585 (0.3082) loss: 0.6708 (0.6880) time: 0.1754 data: 0.0724 max mem: 9377 +Train: [64] [ 500/6250] eta: 0:17:19 lr: 0.000039 grad: 0.2343 (0.2959) loss: 0.6716 (0.6841) time: 0.1578 data: 0.0735 max mem: 9377 +Train: [64] [ 600/6250] eta: 0:16:30 lr: 0.000039 grad: 0.2019 (0.2825) loss: 0.6816 (0.6826) time: 0.1584 data: 0.0632 max mem: 9377 +Train: [64] [ 700/6250] eta: 0:15:57 lr: 0.000039 grad: 0.2055 (0.2733) loss: 0.6702 (0.6811) time: 0.1498 data: 0.0574 max mem: 9377 +Train: [64] [ 800/6250] eta: 0:15:41 lr: 0.000039 grad: 0.1972 (0.2642) loss: 0.6792 (0.6798) time: 0.1725 data: 0.0862 max mem: 9377 +Train: [64] [ 900/6250] eta: 0:15:24 lr: 0.000039 grad: 0.1917 (0.2565) loss: 0.6731 (0.6791) time: 0.1673 data: 0.0709 max mem: 9377 +Train: [64] [1000/6250] eta: 0:14:56 lr: 0.000039 grad: 0.1843 (0.2502) loss: 0.6702 (0.6786) time: 0.1450 data: 0.0422 max mem: 9377 +Train: [64] [1100/6250] eta: 0:14:35 lr: 0.000039 grad: 0.1891 (0.2452) loss: 0.6618 (0.6773) time: 0.1619 data: 0.0691 max mem: 9377 +Train: [64] [1200/6250] eta: 0:14:12 lr: 0.000039 grad: 0.1797 (0.2407) loss: 0.6583 (0.6767) time: 0.1578 data: 0.0648 max mem: 9377 +Train: [64] [1300/6250] eta: 0:13:55 lr: 0.000039 grad: 0.1946 (0.2369) loss: 0.6600 (0.6760) time: 0.1667 data: 0.0673 max mem: 9377 +Train: [64] [1400/6250] eta: 0:13:37 lr: 0.000039 grad: 0.1893 (0.2336) loss: 0.6722 (0.6752) time: 0.1709 data: 0.0817 max mem: 9377 +Train: [64] [1500/6250] eta: 0:13:16 lr: 0.000039 grad: 0.1947 (0.2309) loss: 0.6706 (0.6749) time: 0.1438 data: 0.0447 max mem: 9377 +Train: [64] [1600/6250] eta: 0:12:57 lr: 0.000039 grad: 0.1822 (0.2285) loss: 0.6846 (0.6749) time: 0.1416 data: 0.0546 max mem: 9377 +Train: [64] [1700/6250] eta: 0:12:40 lr: 0.000039 grad: 0.1900 (0.2263) loss: 0.6868 (0.6752) time: 0.1819 data: 0.1040 max mem: 9377 +Train: [64] [1800/6250] eta: 0:12:20 lr: 0.000039 grad: 0.1820 (0.2243) loss: 0.6663 (0.6750) time: 0.1103 data: 0.0200 max mem: 9377 +Train: [64] [1900/6250] eta: 0:12:01 lr: 0.000039 grad: 0.1818 (0.2226) loss: 0.6776 (0.6748) time: 0.1467 data: 0.0538 max mem: 9377 +Train: [64] [2000/6250] eta: 0:11:42 lr: 0.000039 grad: 0.1900 (0.2209) loss: 0.6630 (0.6748) time: 0.1630 data: 0.0799 max mem: 9377 +Train: [64] [2100/6250] eta: 0:11:26 lr: 0.000039 grad: 0.1940 (0.2196) loss: 0.6647 (0.6746) time: 0.1782 data: 0.0857 max mem: 9377 +Train: [64] [2200/6250] eta: 0:11:10 lr: 0.000039 grad: 0.1869 (0.2182) loss: 0.6761 (0.6746) time: 0.1669 data: 0.0816 max mem: 9377 +Train: [64] [2300/6250] eta: 0:10:53 lr: 0.000039 grad: 0.1848 (0.2170) loss: 0.6729 (0.6748) time: 0.1542 data: 0.0512 max mem: 9377 +Train: [64] [2400/6250] eta: 0:10:36 lr: 0.000039 grad: 0.1838 (0.2162) loss: 0.6662 (0.6748) time: 0.1874 data: 0.1099 max mem: 9377 +Train: [64] [2500/6250] eta: 0:10:19 lr: 0.000039 grad: 0.1916 (0.2155) loss: 0.6483 (0.6744) time: 0.1643 data: 0.0734 max mem: 9377 +Train: [64] [2600/6250] eta: 0:10:02 lr: 0.000039 grad: 0.1895 (0.2146) loss: 0.6633 (0.6742) time: 0.1541 data: 0.0571 max mem: 9377 +Train: [64] [2700/6250] eta: 0:09:45 lr: 0.000038 grad: 0.1806 (0.2140) loss: 0.6543 (0.6738) time: 0.1654 data: 0.0721 max mem: 9377 +Train: [64] [2800/6250] eta: 0:09:29 lr: 0.000038 grad: 0.1848 (0.2132) loss: 0.6738 (0.6735) time: 0.1392 data: 0.0452 max mem: 9377 +Train: [64] [2900/6250] eta: 0:09:11 lr: 0.000038 grad: 0.1870 (0.2124) loss: 0.6663 (0.6735) time: 0.1683 data: 0.0823 max mem: 9377 +Train: [64] [3000/6250] eta: 0:08:53 lr: 0.000038 grad: 0.1865 (0.2116) loss: 0.6722 (0.6735) time: 0.1418 data: 0.0529 max mem: 9377 +Train: [64] [3100/6250] eta: 0:08:35 lr: 0.000038 grad: 0.1899 (0.2110) loss: 0.6646 (0.6735) time: 0.1698 data: 0.0717 max mem: 9377 +Train: [64] [3200/6250] eta: 0:08:18 lr: 0.000038 grad: 0.1825 (0.2104) loss: 0.6709 (0.6734) time: 0.1584 data: 0.0587 max mem: 9377 +Train: [64] [3300/6250] eta: 0:07:59 lr: 0.000038 grad: 0.1852 (0.2100) loss: 0.6704 (0.6733) time: 0.1377 data: 0.0384 max mem: 9377 +Train: [64] [3400/6250] eta: 0:07:42 lr: 0.000038 grad: 0.1865 (0.2094) loss: 0.6712 (0.6731) time: 0.1399 data: 0.0377 max mem: 9377 +Train: [64] [3500/6250] eta: 0:07:25 lr: 0.000038 grad: 0.1770 (0.2088) loss: 0.6861 (0.6732) time: 0.1538 data: 0.0631 max mem: 9377 +Train: [64] [3600/6250] eta: 0:07:09 lr: 0.000038 grad: 0.1860 (0.2083) loss: 0.6675 (0.6732) time: 0.1705 data: 0.0826 max mem: 9377 +Train: [64] [3700/6250] eta: 0:06:52 lr: 0.000038 grad: 0.1842 (0.2079) loss: 0.6833 (0.6732) time: 0.1327 data: 0.0445 max mem: 9377 +Train: [64] [3800/6250] eta: 0:06:35 lr: 0.000038 grad: 0.1832 (0.2074) loss: 0.6839 (0.6734) time: 0.1444 data: 0.0503 max mem: 9377 +Train: [64] [3900/6250] eta: 0:06:20 lr: 0.000038 grad: 0.1871 (0.2069) loss: 0.6730 (0.6733) time: 0.1806 data: 0.0990 max mem: 9377 +Train: [64] [4000/6250] eta: 0:06:03 lr: 0.000038 grad: 0.1905 (0.2067) loss: 0.6708 (0.6733) time: 0.1578 data: 0.0710 max mem: 9377 +Train: [64] [4100/6250] eta: 0:05:48 lr: 0.000038 grad: 0.1897 (0.2065) loss: 0.6807 (0.6732) time: 0.1726 data: 0.0865 max mem: 9377 +Train: [64] [4200/6250] eta: 0:05:31 lr: 0.000038 grad: 0.1892 (0.2062) loss: 0.6782 (0.6731) time: 0.1676 data: 0.0823 max mem: 9377 +Train: [64] [4300/6250] eta: 0:05:15 lr: 0.000038 grad: 0.1809 (0.2060) loss: 0.6653 (0.6730) time: 0.1551 data: 0.0638 max mem: 9377 +Train: [64] [4400/6250] eta: 0:04:59 lr: 0.000038 grad: 0.1878 (0.2058) loss: 0.6756 (0.6729) time: 0.1603 data: 0.0695 max mem: 9377 +Train: [64] [4500/6250] eta: 0:04:43 lr: 0.000038 grad: 0.1831 (0.2054) loss: 0.6732 (0.6728) time: 0.1484 data: 0.0584 max mem: 9377 +Train: [64] [4600/6250] eta: 0:04:27 lr: 0.000038 grad: 0.1871 (0.2051) loss: 0.6653 (0.6726) time: 0.1632 data: 0.0764 max mem: 9377 +Train: [64] [4700/6250] eta: 0:04:10 lr: 0.000038 grad: 0.1855 (0.2048) loss: 0.6607 (0.6724) time: 0.1453 data: 0.0532 max mem: 9377 +Train: [64] [4800/6250] eta: 0:03:53 lr: 0.000038 grad: 0.1892 (0.2046) loss: 0.6604 (0.6724) time: 0.1496 data: 0.0543 max mem: 9377 +Train: [64] [4900/6250] eta: 0:03:37 lr: 0.000038 grad: 0.1860 (0.2044) loss: 0.6642 (0.6723) time: 0.1471 data: 0.0550 max mem: 9377 +Train: [64] [5000/6250] eta: 0:03:21 lr: 0.000038 grad: 0.1855 (0.2042) loss: 0.6645 (0.6723) time: 0.1593 data: 0.0719 max mem: 9377 +Train: [64] [5100/6250] eta: 0:03:04 lr: 0.000038 grad: 0.1851 (0.2039) loss: 0.6600 (0.6723) time: 0.1466 data: 0.0630 max mem: 9377 +Train: [64] [5200/6250] eta: 0:02:48 lr: 0.000038 grad: 0.1845 (0.2035) loss: 0.6559 (0.6722) time: 0.1689 data: 0.0821 max mem: 9377 +Train: [64] [5300/6250] eta: 0:02:32 lr: 0.000038 grad: 0.1857 (0.2033) loss: 0.6618 (0.6720) time: 0.1630 data: 0.0797 max mem: 9377 +Train: [64] [5400/6250] eta: 0:02:16 lr: 0.000038 grad: 0.1960 (0.2032) loss: 0.6594 (0.6719) time: 0.1682 data: 0.0813 max mem: 9377 +Train: [64] [5500/6250] eta: 0:02:00 lr: 0.000038 grad: 0.1978 (0.2030) loss: 0.6650 (0.6719) time: 0.1654 data: 0.0749 max mem: 9377 +Train: [64] [5600/6250] eta: 0:01:44 lr: 0.000038 grad: 0.1864 (0.2031) loss: 0.6649 (0.6718) time: 0.1630 data: 0.0675 max mem: 9377 +Train: [64] [5700/6250] eta: 0:01:28 lr: 0.000038 grad: 0.1851 (0.2029) loss: 0.6760 (0.6717) time: 0.1736 data: 0.0937 max mem: 9377 +Train: [64] [5800/6250] eta: 0:01:12 lr: 0.000038 grad: 0.1997 (0.2028) loss: 0.6502 (0.6714) time: 0.1692 data: 0.0745 max mem: 9377 +Train: [64] [5900/6250] eta: 0:00:56 lr: 0.000037 grad: 0.1908 (0.2026) loss: 0.6580 (0.6713) time: 0.1295 data: 0.0353 max mem: 9377 +Train: [64] [6000/6250] eta: 0:00:40 lr: 0.000037 grad: 0.1913 (0.2025) loss: 0.6650 (0.6713) time: 0.1404 data: 0.0543 max mem: 9377 +Train: [64] [6100/6250] eta: 0:00:24 lr: 0.000037 grad: 0.1898 (0.2024) loss: 0.6731 (0.6713) time: 0.1663 data: 0.0851 max mem: 9377 +Train: [64] [6200/6250] eta: 0:00:08 lr: 0.000037 grad: 0.1859 (0.2023) loss: 0.6574 (0.6712) time: 0.1491 data: 0.0592 max mem: 9377 +Train: [64] [6249/6250] eta: 0:00:00 lr: 0.000037 grad: 0.1872 (0.2022) loss: 0.6655 (0.6712) time: 0.1502 data: 0.0621 max mem: 9377 +Train: [64] Total time: 0:16:51 (0.1619 s / it) +Averaged stats: lr: 0.000037 grad: 0.1872 (0.2022) loss: 0.6655 (0.6712) +Eval (hcp-train-subset): [64] [ 0/62] eta: 0:06:00 loss: 0.8932 (0.8932) time: 5.8162 data: 5.7870 max mem: 9377 +Eval (hcp-train-subset): [64] [61/62] eta: 0:00:00 loss: 0.9008 (0.9007) time: 0.1467 data: 0.1200 max mem: 9377 +Eval (hcp-train-subset): [64] Total time: 0:00:14 (0.2314 s / it) +Averaged stats (hcp-train-subset): loss: 0.9008 (0.9007) +Making plots (hcp-train-subset): example=59 +Eval (hcp-val): [64] [ 0/62] eta: 0:05:36 loss: 0.8984 (0.8984) time: 5.4241 data: 5.3930 max mem: 9377 +Eval (hcp-val): [64] [61/62] eta: 0:00:00 loss: 0.8999 (0.9016) time: 0.1326 data: 0.1057 max mem: 9377 +Eval (hcp-val): [64] Total time: 0:00:14 (0.2369 s / it) +Averaged stats (hcp-val): loss: 0.8999 (0.9016) +Making plots (hcp-val): example=60 +Eval (nsd-val): [64] [ 0/62] eta: 0:06:30 loss: 0.8927 (0.8927) time: 6.3034 data: 6.2697 max mem: 9377 +Eval (nsd-val): [64] [61/62] eta: 0:00:00 loss: 0.9002 (0.8998) time: 0.1133 data: 0.0881 max mem: 9377 +Eval (nsd-val): [64] Total time: 0:00:14 (0.2409 s / it) +Averaged stats (nsd-val): loss: 0.9002 (0.8998) +Making plots (nsd-val): example=24 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00064.pth +Train: [65] [ 0/6250] eta: 10:29:37 lr: 0.000037 grad: 0.1438 (0.1438) loss: 0.8557 (0.8557) time: 6.0444 data: 5.8976 max mem: 9377 +Train: [65] [ 100/6250] eta: 0:22:21 lr: 0.000037 grad: 0.3025 (0.3761) loss: 0.7015 (0.7083) time: 0.1562 data: 0.0387 max mem: 9377 +Train: [65] [ 200/6250] eta: 0:19:10 lr: 0.000037 grad: 0.3568 (0.3795) loss: 0.6542 (0.6974) time: 0.1508 data: 0.0360 max mem: 9377 +Train: [65] [ 300/6250] eta: 0:17:49 lr: 0.000037 grad: 0.2547 (0.3501) loss: 0.6796 (0.6936) time: 0.1354 data: 0.0357 max mem: 9377 +Train: [65] [ 400/6250] eta: 0:16:53 lr: 0.000037 grad: 0.2522 (0.3285) loss: 0.6763 (0.6910) time: 0.1688 data: 0.0635 max mem: 9377 +Train: [65] [ 500/6250] eta: 0:16:18 lr: 0.000037 grad: 0.2276 (0.3093) loss: 0.6788 (0.6893) time: 0.1422 data: 0.0523 max mem: 9377 +Train: [65] [ 600/6250] eta: 0:15:49 lr: 0.000037 grad: 0.2282 (0.2978) loss: 0.6826 (0.6870) time: 0.1732 data: 0.0831 max mem: 9377 +Train: [65] [ 700/6250] eta: 0:15:48 lr: 0.000037 grad: 0.2003 (0.2854) loss: 0.6550 (0.6855) time: 0.2108 data: 0.1274 max mem: 9377 +Train: [65] [ 800/6250] eta: 0:15:36 lr: 0.000037 grad: 0.1945 (0.2748) loss: 0.6696 (0.6838) time: 0.2003 data: 0.1099 max mem: 9377 +Train: [65] [ 900/6250] eta: 0:15:26 lr: 0.000037 grad: 0.1849 (0.2668) loss: 0.6663 (0.6829) time: 0.1822 data: 0.0922 max mem: 9377 +Train: [65] [1000/6250] eta: 0:15:08 lr: 0.000037 grad: 0.1846 (0.2596) loss: 0.6802 (0.6824) time: 0.1650 data: 0.0821 max mem: 9377 +Train: [65] [1100/6250] eta: 0:14:55 lr: 0.000037 grad: 0.1936 (0.2536) loss: 0.6701 (0.6818) time: 0.2157 data: 0.1316 max mem: 9377 +Train: [65] [1200/6250] eta: 0:14:39 lr: 0.000037 grad: 0.1948 (0.2488) loss: 0.6655 (0.6812) time: 0.1759 data: 0.0796 max mem: 9377 +Train: [65] [1300/6250] eta: 0:14:22 lr: 0.000037 grad: 0.1820 (0.2447) loss: 0.6821 (0.6809) time: 0.1793 data: 0.0787 max mem: 9377 +Train: [65] [1400/6250] eta: 0:14:04 lr: 0.000037 grad: 0.1794 (0.2408) loss: 0.6850 (0.6806) time: 0.1694 data: 0.0708 max mem: 9377 +Train: [65] [1500/6250] eta: 0:13:44 lr: 0.000037 grad: 0.1854 (0.2374) loss: 0.6716 (0.6805) time: 0.1691 data: 0.0791 max mem: 9377 +Train: [65] [1600/6250] eta: 0:13:20 lr: 0.000037 grad: 0.1853 (0.2343) loss: 0.6665 (0.6801) time: 0.1593 data: 0.0651 max mem: 9377 +Train: [65] [1700/6250] eta: 0:12:57 lr: 0.000037 grad: 0.1838 (0.2317) loss: 0.6810 (0.6799) time: 0.1582 data: 0.0683 max mem: 9377 +Train: [65] [1800/6250] eta: 0:12:36 lr: 0.000037 grad: 0.1812 (0.2293) loss: 0.6850 (0.6798) time: 0.1555 data: 0.0670 max mem: 9377 +Train: [65] [1900/6250] eta: 0:12:17 lr: 0.000037 grad: 0.1867 (0.2272) loss: 0.6557 (0.6797) time: 0.1607 data: 0.0763 max mem: 9377 +Train: [65] [2000/6250] eta: 0:11:58 lr: 0.000037 grad: 0.1831 (0.2256) loss: 0.6841 (0.6796) time: 0.1443 data: 0.0505 max mem: 9377 +Train: [65] [2100/6250] eta: 0:11:39 lr: 0.000037 grad: 0.1851 (0.2241) loss: 0.6839 (0.6797) time: 0.1542 data: 0.0734 max mem: 9377 +Train: [65] [2200/6250] eta: 0:11:21 lr: 0.000037 grad: 0.1950 (0.2226) loss: 0.6677 (0.6795) time: 0.1590 data: 0.0760 max mem: 9377 +Train: [65] [2300/6250] eta: 0:11:04 lr: 0.000037 grad: 0.1848 (0.2215) loss: 0.6801 (0.6792) time: 0.1859 data: 0.0943 max mem: 9377 +Train: [65] [2400/6250] eta: 0:10:47 lr: 0.000037 grad: 0.1860 (0.2204) loss: 0.6732 (0.6789) time: 0.1624 data: 0.0699 max mem: 9377 +Train: [65] [2500/6250] eta: 0:10:30 lr: 0.000037 grad: 0.1811 (0.2192) loss: 0.6777 (0.6789) time: 0.2149 data: 0.1155 max mem: 9377 +Train: [65] [2600/6250] eta: 0:10:14 lr: 0.000037 grad: 0.1821 (0.2183) loss: 0.6805 (0.6787) time: 0.1815 data: 0.0871 max mem: 9377 +Train: [65] [2700/6250] eta: 0:09:59 lr: 0.000037 grad: 0.1888 (0.2175) loss: 0.6739 (0.6786) time: 0.1790 data: 0.0917 max mem: 9377 +Train: [65] [2800/6250] eta: 0:09:43 lr: 0.000037 grad: 0.1890 (0.2168) loss: 0.6742 (0.6783) time: 0.2073 data: 0.1125 max mem: 9377 +Train: [65] [2900/6250] eta: 0:09:25 lr: 0.000037 grad: 0.1885 (0.2164) loss: 0.6900 (0.6783) time: 0.1914 data: 0.1016 max mem: 9377 +Train: [65] [3000/6250] eta: 0:09:07 lr: 0.000036 grad: 0.1886 (0.2156) loss: 0.6752 (0.6783) time: 0.1482 data: 0.0586 max mem: 9377 +Train: [65] [3100/6250] eta: 0:08:50 lr: 0.000036 grad: 0.1833 (0.2148) loss: 0.6897 (0.6786) time: 0.1445 data: 0.0507 max mem: 9377 +Train: [65] [3200/6250] eta: 0:08:33 lr: 0.000036 grad: 0.1833 (0.2143) loss: 0.6727 (0.6785) time: 0.1593 data: 0.0700 max mem: 9377 +Train: [65] [3300/6250] eta: 0:08:15 lr: 0.000036 grad: 0.1902 (0.2136) loss: 0.6796 (0.6786) time: 0.1514 data: 0.0633 max mem: 9377 +Train: [65] [3400/6250] eta: 0:07:57 lr: 0.000036 grad: 0.1941 (0.2130) loss: 0.6594 (0.6786) time: 0.1697 data: 0.0884 max mem: 9377 +Train: [65] [3500/6250] eta: 0:07:40 lr: 0.000036 grad: 0.1906 (0.2124) loss: 0.6754 (0.6786) time: 0.1366 data: 0.0556 max mem: 9377 +Train: [65] [3600/6250] eta: 0:07:23 lr: 0.000036 grad: 0.1960 (0.2121) loss: 0.6683 (0.6786) time: 0.1519 data: 0.0723 max mem: 9377 +Train: [65] [3700/6250] eta: 0:07:05 lr: 0.000036 grad: 0.1812 (0.2113) loss: 0.6806 (0.6785) time: 0.1473 data: 0.0569 max mem: 9377 +Train: [65] [3800/6250] eta: 0:06:48 lr: 0.000036 grad: 0.1921 (0.2109) loss: 0.6743 (0.6784) time: 0.1565 data: 0.0613 max mem: 9377 +Train: [65] [3900/6250] eta: 0:06:31 lr: 0.000036 grad: 0.1853 (0.2106) loss: 0.6841 (0.6784) time: 0.1593 data: 0.0725 max mem: 9377 +Train: [65] [4000/6250] eta: 0:06:14 lr: 0.000036 grad: 0.1856 (0.2102) loss: 0.6891 (0.6783) time: 0.1510 data: 0.0612 max mem: 9377 +Train: [65] [4100/6250] eta: 0:05:57 lr: 0.000036 grad: 0.1909 (0.2097) loss: 0.6738 (0.6782) time: 0.1592 data: 0.0683 max mem: 9377 +Train: [65] [4200/6250] eta: 0:05:40 lr: 0.000036 grad: 0.1897 (0.2093) loss: 0.6607 (0.6780) time: 0.1430 data: 0.0621 max mem: 9377 +Train: [65] [4300/6250] eta: 0:05:23 lr: 0.000036 grad: 0.1926 (0.2090) loss: 0.6610 (0.6779) time: 0.1786 data: 0.0887 max mem: 9377 +Train: [65] [4400/6250] eta: 0:05:06 lr: 0.000036 grad: 0.1904 (0.2088) loss: 0.6721 (0.6777) time: 0.1878 data: 0.1092 max mem: 9377 +Train: [65] [4500/6250] eta: 0:04:50 lr: 0.000036 grad: 0.1868 (0.2085) loss: 0.6736 (0.6776) time: 0.1644 data: 0.0755 max mem: 9377 +Train: [65] [4600/6250] eta: 0:04:34 lr: 0.000036 grad: 0.1900 (0.2083) loss: 0.6618 (0.6774) time: 0.1629 data: 0.0675 max mem: 9377 +Train: [65] [4700/6250] eta: 0:04:17 lr: 0.000036 grad: 0.2080 (0.2081) loss: 0.6806 (0.6772) time: 0.1836 data: 0.0989 max mem: 9377 +Train: [65] [4800/6250] eta: 0:04:00 lr: 0.000036 grad: 0.1910 (0.2079) loss: 0.6665 (0.6772) time: 0.1415 data: 0.0422 max mem: 9377 +Train: [65] [4900/6250] eta: 0:03:44 lr: 0.000036 grad: 0.1869 (0.2076) loss: 0.6744 (0.6772) time: 0.1505 data: 0.0579 max mem: 9377 +Train: [65] [5000/6250] eta: 0:03:27 lr: 0.000036 grad: 0.1897 (0.2074) loss: 0.6747 (0.6771) time: 0.1586 data: 0.0668 max mem: 9377 +Train: [65] [5100/6250] eta: 0:03:10 lr: 0.000036 grad: 0.1953 (0.2072) loss: 0.6632 (0.6770) time: 0.1535 data: 0.0597 max mem: 9377 +Train: [65] [5200/6250] eta: 0:02:53 lr: 0.000036 grad: 0.1846 (0.2069) loss: 0.6698 (0.6769) time: 0.1782 data: 0.0866 max mem: 9377 +Train: [65] [5300/6250] eta: 0:02:37 lr: 0.000036 grad: 0.1845 (0.2068) loss: 0.6670 (0.6767) time: 0.1592 data: 0.0719 max mem: 9377 +Train: [65] [5400/6250] eta: 0:02:20 lr: 0.000036 grad: 0.1916 (0.2067) loss: 0.6631 (0.6766) time: 0.1643 data: 0.0841 max mem: 9377 +Train: [65] [5500/6250] eta: 0:02:03 lr: 0.000036 grad: 0.1919 (0.2066) loss: 0.6600 (0.6765) time: 0.1199 data: 0.0278 max mem: 9377 +Train: [65] [5600/6250] eta: 0:01:47 lr: 0.000036 grad: 0.1889 (0.2064) loss: 0.6561 (0.6764) time: 0.1582 data: 0.0649 max mem: 9377 +Train: [65] [5700/6250] eta: 0:01:30 lr: 0.000036 grad: 0.1955 (0.2062) loss: 0.6559 (0.6762) time: 0.1633 data: 0.0775 max mem: 9377 +Train: [65] [5800/6250] eta: 0:01:14 lr: 0.000036 grad: 0.1878 (0.2062) loss: 0.6719 (0.6760) time: 0.1410 data: 0.0497 max mem: 9377 +Train: [65] [5900/6250] eta: 0:00:57 lr: 0.000036 grad: 0.1887 (0.2062) loss: 0.6698 (0.6759) time: 0.1447 data: 0.0527 max mem: 9377 +Train: [65] [6000/6250] eta: 0:00:41 lr: 0.000036 grad: 0.1862 (0.2060) loss: 0.6688 (0.6758) time: 0.1480 data: 0.0501 max mem: 9377 +Train: [65] [6100/6250] eta: 0:00:24 lr: 0.000036 grad: 0.1931 (0.2060) loss: 0.6777 (0.6757) time: 0.1470 data: 0.0618 max mem: 9377 +Train: [65] [6200/6250] eta: 0:00:08 lr: 0.000036 grad: 0.1908 (0.2060) loss: 0.6741 (0.6755) time: 0.1371 data: 0.0489 max mem: 9377 +Train: [65] [6249/6250] eta: 0:00:00 lr: 0.000036 grad: 0.1901 (0.2060) loss: 0.6629 (0.6755) time: 0.1425 data: 0.0546 max mem: 9377 +Train: [65] Total time: 0:17:10 (0.1649 s / it) +Averaged stats: lr: 0.000036 grad: 0.1901 (0.2060) loss: 0.6629 (0.6755) +Eval (hcp-train-subset): [65] [ 0/62] eta: 0:04:32 loss: 0.8919 (0.8919) time: 4.3977 data: 4.3026 max mem: 9377 +Eval (hcp-train-subset): [65] [61/62] eta: 0:00:00 loss: 0.9022 (0.9024) time: 0.1479 data: 0.1209 max mem: 9377 +Eval (hcp-train-subset): [65] Total time: 0:00:15 (0.2474 s / it) +Averaged stats (hcp-train-subset): loss: 0.9022 (0.9024) +Eval (hcp-val): [65] [ 0/62] eta: 0:04:36 loss: 0.9042 (0.9042) time: 4.4676 data: 4.3781 max mem: 9377 +Eval (hcp-val): [65] [61/62] eta: 0:00:00 loss: 0.8997 (0.9031) time: 0.1344 data: 0.1076 max mem: 9377 +Eval (hcp-val): [65] Total time: 0:00:15 (0.2434 s / it) +Averaged stats (hcp-val): loss: 0.8997 (0.9031) +Eval (nsd-val): [65] [ 0/62] eta: 0:05:18 loss: 0.8901 (0.8901) time: 5.1354 data: 5.1039 max mem: 9377 +Eval (nsd-val): [65] [61/62] eta: 0:00:00 loss: 0.9003 (0.9022) time: 0.1490 data: 0.1239 max mem: 9377 +Eval (nsd-val): [65] Total time: 0:00:14 (0.2398 s / it) +Averaged stats (nsd-val): loss: 0.9003 (0.9022) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [66] [ 0/6250] eta: 10:51:09 lr: 0.000036 grad: 0.2935 (0.2935) loss: 0.7316 (0.7316) time: 6.2511 data: 6.0802 max mem: 9377 +Train: [66] [ 100/6250] eta: 0:22:29 lr: 0.000035 grad: 0.3039 (0.3581) loss: 0.6880 (0.6970) time: 0.1708 data: 0.0719 max mem: 9377 +Train: [66] [ 200/6250] eta: 0:20:22 lr: 0.000035 grad: 0.2390 (0.3148) loss: 0.6643 (0.6911) time: 0.1802 data: 0.0715 max mem: 9377 +Train: [66] [ 300/6250] eta: 0:19:21 lr: 0.000035 grad: 0.2394 (0.2928) loss: 0.6553 (0.6846) time: 0.1967 data: 0.0963 max mem: 9377 +Train: [66] [ 400/6250] eta: 0:18:40 lr: 0.000035 grad: 0.2213 (0.2812) loss: 0.6937 (0.6855) time: 0.1903 data: 0.0891 max mem: 9377 +Train: [66] [ 500/6250] eta: 0:18:00 lr: 0.000035 grad: 0.2402 (0.2717) loss: 0.6814 (0.6850) time: 0.1842 data: 0.0859 max mem: 9377 +Train: [66] [ 600/6250] eta: 0:17:18 lr: 0.000035 grad: 0.2145 (0.2676) loss: 0.6797 (0.6834) time: 0.1499 data: 0.0496 max mem: 9377 +Train: [66] [ 700/6250] eta: 0:16:46 lr: 0.000035 grad: 0.2132 (0.2607) loss: 0.6752 (0.6821) time: 0.1459 data: 0.0665 max mem: 9377 +Train: [66] [ 800/6250] eta: 0:16:29 lr: 0.000035 grad: 0.2033 (0.2549) loss: 0.6657 (0.6806) time: 0.2224 data: 0.1338 max mem: 9377 +Train: [66] [ 900/6250] eta: 0:16:01 lr: 0.000035 grad: 0.1949 (0.2500) loss: 0.6719 (0.6791) time: 0.1735 data: 0.0884 max mem: 9377 +Train: [66] [1000/6250] eta: 0:15:34 lr: 0.000035 grad: 0.1884 (0.2454) loss: 0.6614 (0.6783) time: 0.1609 data: 0.0662 max mem: 9377 +Train: [66] [1100/6250] eta: 0:15:06 lr: 0.000035 grad: 0.1900 (0.2411) loss: 0.6748 (0.6777) time: 0.1574 data: 0.0615 max mem: 9377 +Train: [66] [1200/6250] eta: 0:14:43 lr: 0.000035 grad: 0.1885 (0.2376) loss: 0.6597 (0.6766) time: 0.1722 data: 0.0844 max mem: 9377 +Train: [66] [1300/6250] eta: 0:14:19 lr: 0.000035 grad: 0.1928 (0.2341) loss: 0.6592 (0.6758) time: 0.1583 data: 0.0614 max mem: 9377 +Train: [66] [1400/6250] eta: 0:13:55 lr: 0.000035 grad: 0.1912 (0.2313) loss: 0.6687 (0.6752) time: 0.1457 data: 0.0624 max mem: 9377 +Train: [66] [1500/6250] eta: 0:13:32 lr: 0.000035 grad: 0.1846 (0.2289) loss: 0.6648 (0.6750) time: 0.1625 data: 0.0652 max mem: 9377 +Train: [66] [1600/6250] eta: 0:13:07 lr: 0.000035 grad: 0.1848 (0.2268) loss: 0.6744 (0.6752) time: 0.1389 data: 0.0515 max mem: 9377 +Train: [66] [1700/6250] eta: 0:12:44 lr: 0.000035 grad: 0.1913 (0.2249) loss: 0.6645 (0.6749) time: 0.1330 data: 0.0436 max mem: 9377 +Train: [66] [1800/6250] eta: 0:12:24 lr: 0.000035 grad: 0.1916 (0.2232) loss: 0.6673 (0.6750) time: 0.1154 data: 0.0150 max mem: 9377 +Train: [66] [1900/6250] eta: 0:12:07 lr: 0.000035 grad: 0.1857 (0.2216) loss: 0.6739 (0.6749) time: 0.1600 data: 0.0778 max mem: 9377 +Train: [66] [2000/6250] eta: 0:11:49 lr: 0.000035 grad: 0.1875 (0.2201) loss: 0.6867 (0.6750) time: 0.1563 data: 0.0714 max mem: 9377 +Train: [66] [2100/6250] eta: 0:11:30 lr: 0.000035 grad: 0.1836 (0.2187) loss: 0.6718 (0.6749) time: 0.1414 data: 0.0491 max mem: 9377 +Train: [66] [2200/6250] eta: 0:11:14 lr: 0.000035 grad: 0.1851 (0.2173) loss: 0.6739 (0.6751) time: 0.1732 data: 0.0953 max mem: 9377 +Train: [66] [2300/6250] eta: 0:10:57 lr: 0.000035 grad: 0.1819 (0.2164) loss: 0.6821 (0.6753) time: 0.1842 data: 0.1112 max mem: 9377 +Train: [66] [2400/6250] eta: 0:10:40 lr: 0.000035 grad: 0.1868 (0.2155) loss: 0.6722 (0.6753) time: 0.1758 data: 0.0943 max mem: 9377 +Train: [66] [2500/6250] eta: 0:10:23 lr: 0.000035 grad: 0.1868 (0.2147) loss: 0.6654 (0.6752) time: 0.1580 data: 0.0675 max mem: 9377 +Train: [66] [2600/6250] eta: 0:10:07 lr: 0.000035 grad: 0.1940 (0.2140) loss: 0.6772 (0.6750) time: 0.1686 data: 0.0834 max mem: 9377 +Train: [66] [2700/6250] eta: 0:09:49 lr: 0.000035 grad: 0.1887 (0.2132) loss: 0.6779 (0.6750) time: 0.1705 data: 0.0859 max mem: 9377 +Train: [66] [2800/6250] eta: 0:09:32 lr: 0.000035 grad: 0.1932 (0.2127) loss: 0.6675 (0.6746) time: 0.1478 data: 0.0573 max mem: 9377 +Train: [66] [2900/6250] eta: 0:09:14 lr: 0.000035 grad: 0.1841 (0.2120) loss: 0.6779 (0.6745) time: 0.1445 data: 0.0501 max mem: 9377 +Train: [66] [3000/6250] eta: 0:08:56 lr: 0.000035 grad: 0.1820 (0.2114) loss: 0.6655 (0.6744) time: 0.1597 data: 0.0724 max mem: 9377 +Train: [66] [3100/6250] eta: 0:08:38 lr: 0.000035 grad: 0.1856 (0.2110) loss: 0.6720 (0.6742) time: 0.1643 data: 0.0843 max mem: 9377 +Train: [66] [3200/6250] eta: 0:08:20 lr: 0.000035 grad: 0.1933 (0.2106) loss: 0.6677 (0.6740) time: 0.1434 data: 0.0511 max mem: 9377 +Train: [66] [3300/6250] eta: 0:08:01 lr: 0.000035 grad: 0.1904 (0.2102) loss: 0.6644 (0.6738) time: 0.1115 data: 0.0167 max mem: 9377 +Train: [66] [3400/6250] eta: 0:07:44 lr: 0.000035 grad: 0.1934 (0.2098) loss: 0.6861 (0.6738) time: 0.1728 data: 0.0849 max mem: 9377 +Train: [66] [3500/6250] eta: 0:07:28 lr: 0.000034 grad: 0.1869 (0.2094) loss: 0.6774 (0.6739) time: 0.1594 data: 0.0688 max mem: 9377 +Train: [66] [3600/6250] eta: 0:07:12 lr: 0.000034 grad: 0.1887 (0.2090) loss: 0.6637 (0.6740) time: 0.1474 data: 0.0608 max mem: 9377 +Train: [66] [3700/6250] eta: 0:06:55 lr: 0.000034 grad: 0.1817 (0.2086) loss: 0.6779 (0.6742) time: 0.1709 data: 0.0929 max mem: 9377 +Train: [66] [3800/6250] eta: 0:06:39 lr: 0.000034 grad: 0.1874 (0.2083) loss: 0.6804 (0.6744) time: 0.1626 data: 0.0789 max mem: 9377 +Train: [66] [3900/6250] eta: 0:06:22 lr: 0.000034 grad: 0.1872 (0.2080) loss: 0.6840 (0.6745) time: 0.1386 data: 0.0444 max mem: 9377 +Train: [66] [4000/6250] eta: 0:06:06 lr: 0.000034 grad: 0.1861 (0.2076) loss: 0.6616 (0.6746) time: 0.1737 data: 0.0989 max mem: 9377 +Train: [66] [4100/6250] eta: 0:05:50 lr: 0.000034 grad: 0.1942 (0.2074) loss: 0.6694 (0.6746) time: 0.1305 data: 0.0398 max mem: 9377 +Train: [66] [4200/6250] eta: 0:05:34 lr: 0.000034 grad: 0.1894 (0.2072) loss: 0.6775 (0.6746) time: 0.1615 data: 0.0749 max mem: 9377 +Train: [66] [4300/6250] eta: 0:05:18 lr: 0.000034 grad: 0.1882 (0.2070) loss: 0.6666 (0.6746) time: 0.1688 data: 0.0699 max mem: 9377 +Train: [66] [4400/6250] eta: 0:05:02 lr: 0.000034 grad: 0.1874 (0.2067) loss: 0.6612 (0.6746) time: 0.1673 data: 0.0810 max mem: 9377 +Train: [66] [4500/6250] eta: 0:04:46 lr: 0.000034 grad: 0.1812 (0.2065) loss: 0.6756 (0.6747) time: 0.1662 data: 0.0701 max mem: 9377 +Train: [66] [4600/6250] eta: 0:04:29 lr: 0.000034 grad: 0.1856 (0.2063) loss: 0.6831 (0.6747) time: 0.2086 data: 0.1243 max mem: 9377 +Train: [66] [4700/6250] eta: 0:04:13 lr: 0.000034 grad: 0.1835 (0.2060) loss: 0.6788 (0.6749) time: 0.1611 data: 0.0754 max mem: 9377 +Train: [66] [4800/6250] eta: 0:03:56 lr: 0.000034 grad: 0.1807 (0.2058) loss: 0.6819 (0.6750) time: 0.1565 data: 0.0690 max mem: 9377 +Train: [66] [4900/6250] eta: 0:03:40 lr: 0.000034 grad: 0.1834 (0.2054) loss: 0.6770 (0.6751) time: 0.1557 data: 0.0598 max mem: 9377 +Train: [66] [5000/6250] eta: 0:03:23 lr: 0.000034 grad: 0.1908 (0.2052) loss: 0.6657 (0.6751) time: 0.1551 data: 0.0623 max mem: 9377 +Train: [66] [5100/6250] eta: 0:03:07 lr: 0.000034 grad: 0.1878 (0.2051) loss: 0.6849 (0.6751) time: 0.1777 data: 0.0911 max mem: 9377 +Train: [66] [5200/6250] eta: 0:02:50 lr: 0.000034 grad: 0.1934 (0.2049) loss: 0.6698 (0.6751) time: 0.1405 data: 0.0435 max mem: 9377 +Train: [66] [5300/6250] eta: 0:02:34 lr: 0.000034 grad: 0.1906 (0.2048) loss: 0.6750 (0.6751) time: 0.1363 data: 0.0501 max mem: 9377 +Train: [66] [5400/6250] eta: 0:02:18 lr: 0.000034 grad: 0.1854 (0.2046) loss: 0.6738 (0.6751) time: 0.1720 data: 0.0883 max mem: 9377 +Train: [66] [5500/6250] eta: 0:02:02 lr: 0.000034 grad: 0.1905 (0.2045) loss: 0.6557 (0.6750) time: 0.2190 data: 0.1334 max mem: 9377 +Train: [66] [5600/6250] eta: 0:01:45 lr: 0.000034 grad: 0.1853 (0.2043) loss: 0.6827 (0.6749) time: 0.1455 data: 0.0660 max mem: 9377 +Train: [66] [5700/6250] eta: 0:01:29 lr: 0.000034 grad: 0.1883 (0.2042) loss: 0.6758 (0.6750) time: 0.1580 data: 0.0677 max mem: 9377 +Train: [66] [5800/6250] eta: 0:01:12 lr: 0.000034 grad: 0.1869 (0.2041) loss: 0.6798 (0.6750) time: 0.1490 data: 0.0619 max mem: 9377 +Train: [66] [5900/6250] eta: 0:00:56 lr: 0.000034 grad: 0.1907 (0.2040) loss: 0.6728 (0.6750) time: 0.1418 data: 0.0521 max mem: 9377 +Train: [66] [6000/6250] eta: 0:00:40 lr: 0.000034 grad: 0.1815 (0.2038) loss: 0.6727 (0.6751) time: 0.1533 data: 0.0635 max mem: 9377 +Train: [66] [6100/6250] eta: 0:00:24 lr: 0.000034 grad: 0.1974 (0.2037) loss: 0.6586 (0.6751) time: 0.1418 data: 0.0506 max mem: 9377 +Train: [66] [6200/6250] eta: 0:00:08 lr: 0.000034 grad: 0.1983 (0.2038) loss: 0.6684 (0.6751) time: 0.1771 data: 0.0925 max mem: 9377 +Train: [66] [6249/6250] eta: 0:00:00 lr: 0.000034 grad: 0.1907 (0.2037) loss: 0.6728 (0.6750) time: 0.1685 data: 0.0839 max mem: 9377 +Train: [66] Total time: 0:16:57 (0.1628 s / it) +Averaged stats: lr: 0.000034 grad: 0.1907 (0.2037) loss: 0.6728 (0.6750) +Eval (hcp-train-subset): [66] [ 0/62] eta: 0:05:35 loss: 0.8952 (0.8952) time: 5.4073 data: 5.3779 max mem: 9377 +Eval (hcp-train-subset): [66] [61/62] eta: 0:00:00 loss: 0.9028 (0.9025) time: 0.1623 data: 0.1352 max mem: 9377 +Eval (hcp-train-subset): [66] Total time: 0:00:15 (0.2510 s / it) +Averaged stats (hcp-train-subset): loss: 0.9028 (0.9025) +Eval (hcp-val): [66] [ 0/62] eta: 0:04:09 loss: 0.9053 (0.9053) time: 4.0173 data: 3.9561 max mem: 9377 +Eval (hcp-val): [66] [61/62] eta: 0:00:00 loss: 0.9003 (0.9022) time: 0.1455 data: 0.1186 max mem: 9377 +Eval (hcp-val): [66] Total time: 0:00:15 (0.2471 s / it) +Averaged stats (hcp-val): loss: 0.9003 (0.9022) +Eval (nsd-val): [66] [ 0/62] eta: 0:06:03 loss: 0.8985 (0.8985) time: 5.8691 data: 5.8383 max mem: 9377 +Eval (nsd-val): [66] [61/62] eta: 0:00:00 loss: 0.9053 (0.9064) time: 0.1404 data: 0.1129 max mem: 9377 +Eval (nsd-val): [66] Total time: 0:00:15 (0.2554 s / it) +Averaged stats (nsd-val): loss: 0.9053 (0.9064) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [67] [ 0/6250] eta: 8:40:22 lr: 0.000034 grad: 0.3431 (0.3431) loss: 0.6383 (0.6383) time: 4.9956 data: 4.8447 max mem: 9377 +Train: [67] [ 100/6250] eta: 0:23:11 lr: 0.000034 grad: 0.3159 (0.3516) loss: 0.6803 (0.6965) time: 0.1598 data: 0.0441 max mem: 9377 +Train: [67] [ 200/6250] eta: 0:20:05 lr: 0.000034 grad: 0.3023 (0.3492) loss: 0.6878 (0.6872) time: 0.1788 data: 0.0865 max mem: 9377 +Train: [67] [ 300/6250] eta: 0:18:39 lr: 0.000034 grad: 0.2511 (0.3236) loss: 0.6840 (0.6833) time: 0.1901 data: 0.0890 max mem: 9377 +Train: [67] [ 400/6250] eta: 0:17:31 lr: 0.000034 grad: 0.2447 (0.3062) loss: 0.6644 (0.6815) time: 0.1171 data: 0.0302 max mem: 9377 +Train: [67] [ 500/6250] eta: 0:16:44 lr: 0.000034 grad: 0.2268 (0.2930) loss: 0.6631 (0.6819) time: 0.1472 data: 0.0530 max mem: 9377 +Train: [67] [ 600/6250] eta: 0:16:06 lr: 0.000033 grad: 0.2125 (0.2813) loss: 0.6699 (0.6803) time: 0.1526 data: 0.0568 max mem: 9377 +Train: [67] [ 700/6250] eta: 0:15:37 lr: 0.000033 grad: 0.2002 (0.2711) loss: 0.6670 (0.6800) time: 0.1573 data: 0.0622 max mem: 9377 +Train: [67] [ 800/6250] eta: 0:15:25 lr: 0.000033 grad: 0.1979 (0.2631) loss: 0.6715 (0.6796) time: 0.1620 data: 0.0622 max mem: 9377 +Train: [67] [ 900/6250] eta: 0:15:03 lr: 0.000033 grad: 0.1971 (0.2561) loss: 0.6755 (0.6798) time: 0.1641 data: 0.0698 max mem: 9377 +Train: [67] [1000/6250] eta: 0:14:39 lr: 0.000033 grad: 0.1941 (0.2503) loss: 0.6667 (0.6799) time: 0.1378 data: 0.0366 max mem: 9377 +Train: [67] [1100/6250] eta: 0:14:22 lr: 0.000033 grad: 0.1937 (0.2471) loss: 0.6756 (0.6795) time: 0.1708 data: 0.0854 max mem: 9377 +Train: [67] [1200/6250] eta: 0:14:09 lr: 0.000033 grad: 0.1898 (0.2442) loss: 0.6630 (0.6788) time: 0.1823 data: 0.0890 max mem: 9377 +Train: [67] [1300/6250] eta: 0:13:53 lr: 0.000033 grad: 0.1938 (0.2408) loss: 0.6670 (0.6780) time: 0.1959 data: 0.1071 max mem: 9377 +Train: [67] [1400/6250] eta: 0:13:34 lr: 0.000033 grad: 0.1907 (0.2378) loss: 0.6593 (0.6772) time: 0.1752 data: 0.0859 max mem: 9377 +Train: [67] [1500/6250] eta: 0:13:13 lr: 0.000033 grad: 0.2045 (0.2364) loss: 0.6652 (0.6765) time: 0.1475 data: 0.0577 max mem: 9377 +Train: [67] [1600/6250] eta: 0:12:55 lr: 0.000033 grad: 0.2047 (0.2351) loss: 0.6696 (0.6761) time: 0.1822 data: 0.0978 max mem: 9377 +Train: [67] [1700/6250] eta: 0:12:36 lr: 0.000033 grad: 0.1988 (0.2345) loss: 0.6500 (0.6753) time: 0.1642 data: 0.0716 max mem: 9377 +Train: [67] [1800/6250] eta: 0:12:21 lr: 0.000033 grad: 0.2051 (0.2338) loss: 0.6628 (0.6747) time: 0.1714 data: 0.0839 max mem: 9377 +Train: [67] [1900/6250] eta: 0:12:03 lr: 0.000033 grad: 0.1997 (0.2323) loss: 0.6468 (0.6740) time: 0.1445 data: 0.0634 max mem: 9377 +Train: [67] [2000/6250] eta: 0:11:47 lr: 0.000033 grad: 0.1986 (0.2309) loss: 0.6536 (0.6737) time: 0.1666 data: 0.0837 max mem: 9377 +Train: [67] [2100/6250] eta: 0:11:31 lr: 0.000033 grad: 0.1958 (0.2297) loss: 0.6713 (0.6735) time: 0.1892 data: 0.1066 max mem: 9377 +Train: [67] [2200/6250] eta: 0:11:15 lr: 0.000033 grad: 0.1888 (0.2285) loss: 0.6682 (0.6734) time: 0.1792 data: 0.0928 max mem: 9377 +Train: [67] [2300/6250] eta: 0:10:58 lr: 0.000033 grad: 0.1808 (0.2271) loss: 0.6731 (0.6732) time: 0.1909 data: 0.1034 max mem: 9377 +Train: [67] [2400/6250] eta: 0:10:40 lr: 0.000033 grad: 0.1906 (0.2260) loss: 0.6734 (0.6732) time: 0.1608 data: 0.0799 max mem: 9377 +Train: [67] [2500/6250] eta: 0:10:20 lr: 0.000033 grad: 0.1932 (0.2248) loss: 0.6627 (0.6732) time: 0.1538 data: 0.0656 max mem: 9377 +Train: [67] [2600/6250] eta: 0:10:05 lr: 0.000033 grad: 0.1890 (0.2238) loss: 0.6740 (0.6731) time: 0.1687 data: 0.0754 max mem: 9377 +Train: [67] [2700/6250] eta: 0:09:47 lr: 0.000033 grad: 0.1873 (0.2228) loss: 0.6902 (0.6731) time: 0.1675 data: 0.0738 max mem: 9377 +Train: [67] [2800/6250] eta: 0:09:31 lr: 0.000033 grad: 0.1953 (0.2219) loss: 0.6608 (0.6730) time: 0.1722 data: 0.0736 max mem: 9377 +Train: [67] [2900/6250] eta: 0:09:13 lr: 0.000033 grad: 0.1900 (0.2209) loss: 0.6653 (0.6730) time: 0.1597 data: 0.0707 max mem: 9377 +Train: [67] [3000/6250] eta: 0:08:56 lr: 0.000033 grad: 0.1858 (0.2201) loss: 0.6673 (0.6728) time: 0.1717 data: 0.0811 max mem: 9377 +Train: [67] [3100/6250] eta: 0:08:38 lr: 0.000033 grad: 0.1906 (0.2193) loss: 0.6762 (0.6727) time: 0.1531 data: 0.0671 max mem: 9377 +Train: [67] [3200/6250] eta: 0:08:20 lr: 0.000033 grad: 0.1921 (0.2187) loss: 0.6662 (0.6726) time: 0.1663 data: 0.0732 max mem: 9377 +Train: [67] [3300/6250] eta: 0:08:03 lr: 0.000033 grad: 0.1911 (0.2182) loss: 0.6847 (0.6726) time: 0.1405 data: 0.0497 max mem: 9377 +Train: [67] [3400/6250] eta: 0:07:47 lr: 0.000033 grad: 0.1879 (0.2177) loss: 0.6716 (0.6726) time: 0.2037 data: 0.1254 max mem: 9377 +Train: [67] [3500/6250] eta: 0:07:29 lr: 0.000033 grad: 0.1868 (0.2171) loss: 0.6765 (0.6728) time: 0.1572 data: 0.0697 max mem: 9377 +Train: [67] [3600/6250] eta: 0:07:13 lr: 0.000033 grad: 0.2006 (0.2167) loss: 0.6774 (0.6728) time: 0.1794 data: 0.0922 max mem: 9377 +Train: [67] [3700/6250] eta: 0:06:56 lr: 0.000033 grad: 0.1879 (0.2162) loss: 0.6799 (0.6729) time: 0.1484 data: 0.0485 max mem: 9377 +Train: [67] [3800/6250] eta: 0:06:39 lr: 0.000033 grad: 0.1903 (0.2158) loss: 0.6745 (0.6728) time: 0.1552 data: 0.0643 max mem: 9377 +Train: [67] [3900/6250] eta: 0:06:22 lr: 0.000033 grad: 0.1924 (0.2153) loss: 0.6876 (0.6728) time: 0.1562 data: 0.0607 max mem: 9377 +Train: [67] [4000/6250] eta: 0:06:06 lr: 0.000032 grad: 0.1914 (0.2149) loss: 0.6660 (0.6729) time: 0.1801 data: 0.0909 max mem: 9377 +Train: [67] [4100/6250] eta: 0:05:50 lr: 0.000032 grad: 0.1902 (0.2145) loss: 0.6829 (0.6729) time: 0.1904 data: 0.1033 max mem: 9377 +Train: [67] [4200/6250] eta: 0:05:34 lr: 0.000032 grad: 0.1945 (0.2141) loss: 0.6639 (0.6727) time: 0.1643 data: 0.0726 max mem: 9377 +Train: [67] [4300/6250] eta: 0:05:17 lr: 0.000032 grad: 0.1902 (0.2139) loss: 0.6644 (0.6727) time: 0.1332 data: 0.0491 max mem: 9377 +Train: [67] [4400/6250] eta: 0:05:01 lr: 0.000032 grad: 0.1922 (0.2136) loss: 0.6736 (0.6725) time: 0.1756 data: 0.0851 max mem: 9377 +Train: [67] [4500/6250] eta: 0:04:45 lr: 0.000032 grad: 0.1935 (0.2132) loss: 0.6621 (0.6723) time: 0.1765 data: 0.0871 max mem: 9377 +Train: [67] [4600/6250] eta: 0:04:28 lr: 0.000032 grad: 0.1909 (0.2132) loss: 0.6632 (0.6720) time: 0.1413 data: 0.0390 max mem: 9377 +Train: [67] [4700/6250] eta: 0:04:12 lr: 0.000032 grad: 0.1968 (0.2128) loss: 0.6612 (0.6719) time: 0.1723 data: 0.0842 max mem: 9377 +Train: [67] [4800/6250] eta: 0:03:55 lr: 0.000032 grad: 0.1935 (0.2127) loss: 0.6687 (0.6718) time: 0.1434 data: 0.0561 max mem: 9377 +Train: [67] [4900/6250] eta: 0:03:39 lr: 0.000032 grad: 0.1922 (0.2128) loss: 0.6756 (0.6718) time: 0.1664 data: 0.0834 max mem: 9377 +Train: [67] [5000/6250] eta: 0:03:22 lr: 0.000032 grad: 0.1891 (0.2125) loss: 0.6807 (0.6719) time: 0.1531 data: 0.0687 max mem: 9377 +Train: [67] [5100/6250] eta: 0:03:05 lr: 0.000032 grad: 0.1995 (0.2123) loss: 0.6672 (0.6719) time: 0.1446 data: 0.0525 max mem: 9377 +Train: [67] [5200/6250] eta: 0:02:49 lr: 0.000032 grad: 0.1913 (0.2121) loss: 0.6708 (0.6720) time: 0.1236 data: 0.0337 max mem: 9377 +Train: [67] [5300/6250] eta: 0:02:33 lr: 0.000032 grad: 0.1876 (0.2120) loss: 0.6840 (0.6720) time: 0.1545 data: 0.0609 max mem: 9377 +Train: [67] [5400/6250] eta: 0:02:17 lr: 0.000032 grad: 0.1903 (0.2118) loss: 0.6730 (0.6721) time: 0.1700 data: 0.0791 max mem: 9377 +Train: [67] [5500/6250] eta: 0:02:00 lr: 0.000032 grad: 0.1956 (0.2116) loss: 0.6681 (0.6721) time: 0.1626 data: 0.0684 max mem: 9377 +Train: [67] [5600/6250] eta: 0:01:44 lr: 0.000032 grad: 0.1982 (0.2118) loss: 0.6750 (0.6721) time: 0.1375 data: 0.0514 max mem: 9377 +Train: [67] [5700/6250] eta: 0:01:28 lr: 0.000032 grad: 0.1981 (0.2117) loss: 0.6600 (0.6720) time: 0.1856 data: 0.0954 max mem: 9377 +Train: [67] [5800/6250] eta: 0:01:12 lr: 0.000032 grad: 0.1893 (0.2115) loss: 0.6679 (0.6719) time: 0.1490 data: 0.0570 max mem: 9377 +Train: [67] [5900/6250] eta: 0:00:56 lr: 0.000032 grad: 0.1990 (0.2113) loss: 0.6651 (0.6718) time: 0.1434 data: 0.0478 max mem: 9377 +Train: [67] [6000/6250] eta: 0:00:40 lr: 0.000032 grad: 0.1911 (0.2111) loss: 0.6682 (0.6717) time: 0.1223 data: 0.0313 max mem: 9377 +Train: [67] [6100/6250] eta: 0:00:24 lr: 0.000032 grad: 0.1853 (0.2110) loss: 0.6661 (0.6716) time: 0.1620 data: 0.0784 max mem: 9377 +Train: [67] [6200/6250] eta: 0:00:08 lr: 0.000032 grad: 0.1909 (0.2108) loss: 0.6578 (0.6714) time: 0.1656 data: 0.0762 max mem: 9377 +Train: [67] [6249/6250] eta: 0:00:00 lr: 0.000032 grad: 0.1880 (0.2107) loss: 0.6650 (0.6713) time: 0.1687 data: 0.0768 max mem: 9377 +Train: [67] Total time: 0:16:49 (0.1615 s / it) +Averaged stats: lr: 0.000032 grad: 0.1880 (0.2107) loss: 0.6650 (0.6713) +Eval (hcp-train-subset): [67] [ 0/62] eta: 0:04:58 loss: 0.8912 (0.8912) time: 4.8116 data: 4.7268 max mem: 9377 +Eval (hcp-train-subset): [67] [61/62] eta: 0:00:00 loss: 0.9002 (0.9037) time: 0.1510 data: 0.1257 max mem: 9377 +Eval (hcp-train-subset): [67] Total time: 0:00:14 (0.2384 s / it) +Averaged stats (hcp-train-subset): loss: 0.9002 (0.9037) +Eval (hcp-val): [67] [ 0/62] eta: 0:05:36 loss: 0.9072 (0.9072) time: 5.4240 data: 5.3935 max mem: 9377 +Eval (hcp-val): [67] [61/62] eta: 0:00:00 loss: 0.9035 (0.9049) time: 0.1389 data: 0.1123 max mem: 9377 +Eval (hcp-val): [67] Total time: 0:00:14 (0.2384 s / it) +Averaged stats (hcp-val): loss: 0.9035 (0.9049) +Eval (nsd-val): [67] [ 0/62] eta: 0:05:33 loss: 0.9048 (0.9048) time: 5.3740 data: 5.3435 max mem: 9377 +Eval (nsd-val): [67] [61/62] eta: 0:00:00 loss: 0.9027 (0.9048) time: 0.1500 data: 0.1242 max mem: 9377 +Eval (nsd-val): [67] Total time: 0:00:14 (0.2346 s / it) +Averaged stats (nsd-val): loss: 0.9027 (0.9048) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [68] [ 0/6250] eta: 8:39:33 lr: 0.000032 grad: 0.1996 (0.1996) loss: 0.7725 (0.7725) time: 4.9878 data: 4.7398 max mem: 9377 +Train: [68] [ 100/6250] eta: 0:22:27 lr: 0.000032 grad: 0.2803 (0.3066) loss: 0.7013 (0.7057) time: 0.1760 data: 0.0744 max mem: 9377 +Train: [68] [ 200/6250] eta: 0:19:42 lr: 0.000032 grad: 0.2846 (0.3064) loss: 0.6666 (0.6947) time: 0.1491 data: 0.0433 max mem: 9377 +Train: [68] [ 300/6250] eta: 0:18:20 lr: 0.000032 grad: 0.3079 (0.3007) loss: 0.6804 (0.6918) time: 0.1552 data: 0.0520 max mem: 9377 +Train: [68] [ 400/6250] eta: 0:17:52 lr: 0.000032 grad: 0.2526 (0.2916) loss: 0.6822 (0.6892) time: 0.1893 data: 0.0911 max mem: 9377 +Train: [68] [ 500/6250] eta: 0:17:05 lr: 0.000032 grad: 0.2202 (0.2820) loss: 0.6726 (0.6872) time: 0.1509 data: 0.0554 max mem: 9377 +Train: [68] [ 600/6250] eta: 0:16:35 lr: 0.000032 grad: 0.2034 (0.2723) loss: 0.6824 (0.6847) time: 0.1677 data: 0.0758 max mem: 9377 +Train: [68] [ 700/6250] eta: 0:16:03 lr: 0.000032 grad: 0.1992 (0.2632) loss: 0.6718 (0.6843) time: 0.1511 data: 0.0470 max mem: 9377 +Train: [68] [ 800/6250] eta: 0:15:51 lr: 0.000032 grad: 0.1941 (0.2583) loss: 0.6600 (0.6824) time: 0.1832 data: 0.0864 max mem: 9377 +Train: [68] [ 900/6250] eta: 0:15:29 lr: 0.000032 grad: 0.1966 (0.2532) loss: 0.6615 (0.6807) time: 0.1551 data: 0.0631 max mem: 9377 +Train: [68] [1000/6250] eta: 0:15:08 lr: 0.000032 grad: 0.1842 (0.2494) loss: 0.6794 (0.6799) time: 0.1669 data: 0.0714 max mem: 9377 +Train: [68] [1100/6250] eta: 0:14:43 lr: 0.000032 grad: 0.1950 (0.2448) loss: 0.6782 (0.6788) time: 0.1577 data: 0.0742 max mem: 9377 +Train: [68] [1200/6250] eta: 0:14:22 lr: 0.000032 grad: 0.1852 (0.2409) loss: 0.6746 (0.6784) time: 0.1594 data: 0.0618 max mem: 9377 +Train: [68] [1300/6250] eta: 0:14:00 lr: 0.000031 grad: 0.1891 (0.2372) loss: 0.6736 (0.6779) time: 0.1472 data: 0.0496 max mem: 9377 +Train: [68] [1400/6250] eta: 0:13:36 lr: 0.000031 grad: 0.1828 (0.2352) loss: 0.6778 (0.6775) time: 0.1392 data: 0.0429 max mem: 9377 +Train: [68] [1500/6250] eta: 0:13:15 lr: 0.000031 grad: 0.1918 (0.2330) loss: 0.6770 (0.6772) time: 0.1723 data: 0.0869 max mem: 9377 +Train: [68] [1600/6250] eta: 0:12:54 lr: 0.000031 grad: 0.1938 (0.2309) loss: 0.6634 (0.6769) time: 0.1615 data: 0.0622 max mem: 9377 +Train: [68] [1700/6250] eta: 0:12:33 lr: 0.000031 grad: 0.1890 (0.2287) loss: 0.6827 (0.6768) time: 0.1587 data: 0.0539 max mem: 9377 +Train: [68] [1800/6250] eta: 0:12:12 lr: 0.000031 grad: 0.1908 (0.2271) loss: 0.6745 (0.6767) time: 0.1807 data: 0.0900 max mem: 9377 +Train: [68] [1900/6250] eta: 0:11:52 lr: 0.000031 grad: 0.1903 (0.2256) loss: 0.6696 (0.6763) time: 0.1611 data: 0.0764 max mem: 9377 +Train: [68] [2000/6250] eta: 0:11:35 lr: 0.000031 grad: 0.1853 (0.2241) loss: 0.6714 (0.6761) time: 0.1764 data: 0.0885 max mem: 9377 +Train: [68] [2100/6250] eta: 0:11:19 lr: 0.000031 grad: 0.1981 (0.2233) loss: 0.6778 (0.6759) time: 0.1480 data: 0.0657 max mem: 9377 +Train: [68] [2200/6250] eta: 0:11:04 lr: 0.000031 grad: 0.1965 (0.2223) loss: 0.6709 (0.6757) time: 0.1880 data: 0.1123 max mem: 9377 +Train: [68] [2300/6250] eta: 0:10:50 lr: 0.000031 grad: 0.1981 (0.2217) loss: 0.6700 (0.6754) time: 0.2128 data: 0.1327 max mem: 9377 +Train: [68] [2400/6250] eta: 0:10:37 lr: 0.000031 grad: 0.1915 (0.2207) loss: 0.6650 (0.6753) time: 0.2060 data: 0.1158 max mem: 9377 +Train: [68] [2500/6250] eta: 0:10:23 lr: 0.000031 grad: 0.1989 (0.2201) loss: 0.6785 (0.6756) time: 0.2044 data: 0.1211 max mem: 9377 +Train: [68] [2600/6250] eta: 0:10:08 lr: 0.000031 grad: 0.1827 (0.2193) loss: 0.6866 (0.6759) time: 0.1727 data: 0.0804 max mem: 9377 +Train: [68] [2700/6250] eta: 0:09:51 lr: 0.000031 grad: 0.1937 (0.2188) loss: 0.6720 (0.6761) time: 0.1459 data: 0.0561 max mem: 9377 +Train: [68] [2800/6250] eta: 0:09:36 lr: 0.000031 grad: 0.1830 (0.2180) loss: 0.6855 (0.6763) time: 0.1908 data: 0.0993 max mem: 9377 +Train: [68] [2900/6250] eta: 0:09:20 lr: 0.000031 grad: 0.1868 (0.2174) loss: 0.6791 (0.6764) time: 0.1912 data: 0.1040 max mem: 9377 +Train: [68] [3000/6250] eta: 0:09:03 lr: 0.000031 grad: 0.1878 (0.2169) loss: 0.6829 (0.6765) time: 0.1592 data: 0.0588 max mem: 9377 +Train: [68] [3100/6250] eta: 0:08:46 lr: 0.000031 grad: 0.1903 (0.2161) loss: 0.6738 (0.6766) time: 0.1681 data: 0.0790 max mem: 9377 +Train: [68] [3200/6250] eta: 0:08:28 lr: 0.000031 grad: 0.1887 (0.2156) loss: 0.6763 (0.6768) time: 0.1446 data: 0.0582 max mem: 9377 +Train: [68] [3300/6250] eta: 0:08:10 lr: 0.000031 grad: 0.1961 (0.2152) loss: 0.6821 (0.6769) time: 0.1327 data: 0.0369 max mem: 9377 +Train: [68] [3400/6250] eta: 0:07:53 lr: 0.000031 grad: 0.1958 (0.2147) loss: 0.6678 (0.6771) time: 0.1655 data: 0.0745 max mem: 9377 +Train: [68] [3500/6250] eta: 0:07:35 lr: 0.000031 grad: 0.1869 (0.2141) loss: 0.6899 (0.6772) time: 0.1631 data: 0.0822 max mem: 9377 +Train: [68] [3600/6250] eta: 0:07:18 lr: 0.000031 grad: 0.1894 (0.2137) loss: 0.6788 (0.6772) time: 0.1641 data: 0.0725 max mem: 9377 +Train: [68] [3700/6250] eta: 0:07:01 lr: 0.000031 grad: 0.1868 (0.2132) loss: 0.6848 (0.6773) time: 0.1485 data: 0.0513 max mem: 9377 +Train: [68] [3800/6250] eta: 0:06:44 lr: 0.000031 grad: 0.1944 (0.2128) loss: 0.6681 (0.6773) time: 0.1784 data: 0.0822 max mem: 9377 +Train: [68] [3900/6250] eta: 0:06:27 lr: 0.000031 grad: 0.1943 (0.2125) loss: 0.6742 (0.6772) time: 0.1454 data: 0.0605 max mem: 9377 +Train: [68] [4000/6250] eta: 0:06:10 lr: 0.000031 grad: 0.1960 (0.2123) loss: 0.6700 (0.6771) time: 0.1767 data: 0.0974 max mem: 9377 +Train: [68] [4100/6250] eta: 0:05:54 lr: 0.000031 grad: 0.1938 (0.2122) loss: 0.6717 (0.6771) time: 0.1344 data: 0.0440 max mem: 9377 +Train: [68] [4200/6250] eta: 0:05:37 lr: 0.000031 grad: 0.1865 (0.2121) loss: 0.6840 (0.6770) time: 0.1694 data: 0.0843 max mem: 9377 +Train: [68] [4300/6250] eta: 0:05:20 lr: 0.000031 grad: 0.1875 (0.2118) loss: 0.6698 (0.6770) time: 0.1565 data: 0.0757 max mem: 9377 +Train: [68] [4400/6250] eta: 0:05:04 lr: 0.000031 grad: 0.1898 (0.2116) loss: 0.6772 (0.6771) time: 0.1543 data: 0.0602 max mem: 9377 +Train: [68] [4500/6250] eta: 0:04:47 lr: 0.000031 grad: 0.1960 (0.2113) loss: 0.6709 (0.6769) time: 0.1648 data: 0.0783 max mem: 9377 +Train: [68] [4600/6250] eta: 0:04:31 lr: 0.000031 grad: 0.1889 (0.2111) loss: 0.6681 (0.6769) time: 0.1690 data: 0.0760 max mem: 9377 +Train: [68] [4700/6250] eta: 0:04:14 lr: 0.000031 grad: 0.1854 (0.2108) loss: 0.6705 (0.6769) time: 0.1608 data: 0.0703 max mem: 9377 +Train: [68] [4800/6250] eta: 0:03:57 lr: 0.000030 grad: 0.1863 (0.2105) loss: 0.6760 (0.6769) time: 0.1489 data: 0.0412 max mem: 9377 +Train: [68] [4900/6250] eta: 0:03:41 lr: 0.000030 grad: 0.1922 (0.2104) loss: 0.6730 (0.6768) time: 0.1423 data: 0.0535 max mem: 9377 +Train: [68] [5000/6250] eta: 0:03:24 lr: 0.000030 grad: 0.1849 (0.2101) loss: 0.6752 (0.6767) time: 0.1453 data: 0.0513 max mem: 9377 +Train: [68] [5100/6250] eta: 0:03:07 lr: 0.000030 grad: 0.1903 (0.2099) loss: 0.6770 (0.6768) time: 0.1691 data: 0.0750 max mem: 9377 +Train: [68] [5200/6250] eta: 0:02:51 lr: 0.000030 grad: 0.1837 (0.2097) loss: 0.6759 (0.6768) time: 0.1494 data: 0.0631 max mem: 9377 +Train: [68] [5300/6250] eta: 0:02:34 lr: 0.000030 grad: 0.1901 (0.2096) loss: 0.6775 (0.6768) time: 0.1514 data: 0.0685 max mem: 9377 +Train: [68] [5400/6250] eta: 0:02:18 lr: 0.000030 grad: 0.1920 (0.2094) loss: 0.6658 (0.6767) time: 0.1532 data: 0.0652 max mem: 9377 +Train: [68] [5500/6250] eta: 0:02:02 lr: 0.000030 grad: 0.1908 (0.2092) loss: 0.6772 (0.6766) time: 0.1580 data: 0.0707 max mem: 9377 +Train: [68] [5600/6250] eta: 0:01:45 lr: 0.000030 grad: 0.1912 (0.2090) loss: 0.6733 (0.6765) time: 0.1454 data: 0.0615 max mem: 9377 +Train: [68] [5700/6250] eta: 0:01:29 lr: 0.000030 grad: 0.1927 (0.2089) loss: 0.6647 (0.6764) time: 0.1548 data: 0.0651 max mem: 9377 +Train: [68] [5800/6250] eta: 0:01:13 lr: 0.000030 grad: 0.1922 (0.2087) loss: 0.6629 (0.6763) time: 0.1166 data: 0.0249 max mem: 9377 +Train: [68] [5900/6250] eta: 0:00:56 lr: 0.000030 grad: 0.1994 (0.2086) loss: 0.6651 (0.6761) time: 0.1284 data: 0.0418 max mem: 9377 +Train: [68] [6000/6250] eta: 0:00:40 lr: 0.000030 grad: 0.1924 (0.2084) loss: 0.6609 (0.6759) time: 0.1569 data: 0.0582 max mem: 9377 +Train: [68] [6100/6250] eta: 0:00:24 lr: 0.000030 grad: 0.1989 (0.2084) loss: 0.6611 (0.6757) time: 0.1412 data: 0.0469 max mem: 9377 +Train: [68] [6200/6250] eta: 0:00:08 lr: 0.000030 grad: 0.1923 (0.2084) loss: 0.6731 (0.6755) time: 0.1592 data: 0.0695 max mem: 9377 +Train: [68] [6249/6250] eta: 0:00:00 lr: 0.000030 grad: 0.1917 (0.2084) loss: 0.6632 (0.6754) time: 0.1631 data: 0.0679 max mem: 9377 +Train: [68] Total time: 0:17:01 (0.1634 s / it) +Averaged stats: lr: 0.000030 grad: 0.1917 (0.2084) loss: 0.6632 (0.6754) +Eval (hcp-train-subset): [68] [ 0/62] eta: 0:04:50 loss: 0.8980 (0.8980) time: 4.6871 data: 4.6133 max mem: 9377 +Eval (hcp-train-subset): [68] [61/62] eta: 0:00:00 loss: 0.9031 (0.9030) time: 0.1362 data: 0.1085 max mem: 9377 +Eval (hcp-train-subset): [68] Total time: 0:00:15 (0.2531 s / it) +Averaged stats (hcp-train-subset): loss: 0.9031 (0.9030) +Eval (hcp-val): [68] [ 0/62] eta: 0:04:04 loss: 0.9078 (0.9078) time: 3.9408 data: 3.8794 max mem: 9377 +Eval (hcp-val): [68] [61/62] eta: 0:00:00 loss: 0.9019 (0.9041) time: 0.1543 data: 0.1288 max mem: 9377 +Eval (hcp-val): [68] Total time: 0:00:15 (0.2535 s / it) +Averaged stats (hcp-val): loss: 0.9019 (0.9041) +Eval (nsd-val): [68] [ 0/62] eta: 0:06:30 loss: 0.8986 (0.8986) time: 6.3007 data: 6.2637 max mem: 9377 +Eval (nsd-val): [68] [61/62] eta: 0:00:00 loss: 0.9020 (0.9034) time: 0.1567 data: 0.1308 max mem: 9377 +Eval (nsd-val): [68] Total time: 0:00:15 (0.2537 s / it) +Averaged stats (nsd-val): loss: 0.9020 (0.9034) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [69] [ 0/6250] eta: 8:14:28 lr: 0.000030 grad: 0.3273 (0.3273) loss: 0.7238 (0.7238) time: 4.7470 data: 4.4504 max mem: 9377 +Train: [69] [ 100/6250] eta: 0:24:34 lr: 0.000030 grad: 0.3007 (0.3554) loss: 0.6878 (0.6919) time: 0.1816 data: 0.0661 max mem: 9377 +Train: [69] [ 200/6250] eta: 0:21:23 lr: 0.000030 grad: 0.2691 (0.3167) loss: 0.6827 (0.6863) time: 0.1619 data: 0.0688 max mem: 9377 +Train: [69] [ 300/6250] eta: 0:20:06 lr: 0.000030 grad: 0.2543 (0.3073) loss: 0.6855 (0.6813) time: 0.1920 data: 0.0989 max mem: 9377 +Train: [69] [ 400/6250] eta: 0:19:01 lr: 0.000030 grad: 0.2224 (0.2915) loss: 0.6748 (0.6790) time: 0.1245 data: 0.0100 max mem: 9377 +Train: [69] [ 500/6250] eta: 0:18:05 lr: 0.000030 grad: 0.2337 (0.2819) loss: 0.6680 (0.6789) time: 0.1822 data: 0.0796 max mem: 9377 +Train: [69] [ 600/6250] eta: 0:17:15 lr: 0.000030 grad: 0.2063 (0.2746) loss: 0.6703 (0.6785) time: 0.1632 data: 0.0762 max mem: 9377 +Train: [69] [ 700/6250] eta: 0:16:34 lr: 0.000030 grad: 0.2044 (0.2658) loss: 0.6702 (0.6779) time: 0.1521 data: 0.0602 max mem: 9377 +Train: [69] [ 800/6250] eta: 0:16:09 lr: 0.000030 grad: 0.2142 (0.2602) loss: 0.6589 (0.6771) time: 0.1667 data: 0.0832 max mem: 9377 +Train: [69] [ 900/6250] eta: 0:15:49 lr: 0.000030 grad: 0.1912 (0.2540) loss: 0.6778 (0.6772) time: 0.1596 data: 0.0589 max mem: 9377 +Train: [69] [1000/6250] eta: 0:15:24 lr: 0.000030 grad: 0.1966 (0.2497) loss: 0.6676 (0.6774) time: 0.1879 data: 0.0970 max mem: 9377 +Train: [69] [1100/6250] eta: 0:14:55 lr: 0.000030 grad: 0.2000 (0.2477) loss: 0.6639 (0.6771) time: 0.1216 data: 0.0381 max mem: 9377 +Train: [69] [1200/6250] eta: 0:14:37 lr: 0.000030 grad: 0.1966 (0.2444) loss: 0.6784 (0.6766) time: 0.1693 data: 0.0767 max mem: 9377 +Train: [69] [1300/6250] eta: 0:14:23 lr: 0.000030 grad: 0.2106 (0.2423) loss: 0.6722 (0.6763) time: 0.2000 data: 0.1001 max mem: 9377 +Train: [69] [1400/6250] eta: 0:14:03 lr: 0.000030 grad: 0.1957 (0.2398) loss: 0.6720 (0.6762) time: 0.1701 data: 0.0722 max mem: 9377 +Train: [69] [1500/6250] eta: 0:13:43 lr: 0.000030 grad: 0.2000 (0.2376) loss: 0.6667 (0.6759) time: 0.1763 data: 0.0830 max mem: 9377 +Train: [69] [1600/6250] eta: 0:13:19 lr: 0.000030 grad: 0.2031 (0.2361) loss: 0.6462 (0.6750) time: 0.1602 data: 0.0675 max mem: 9377 +Train: [69] [1700/6250] eta: 0:12:57 lr: 0.000030 grad: 0.2103 (0.2354) loss: 0.6518 (0.6742) time: 0.1502 data: 0.0584 max mem: 9377 +Train: [69] [1800/6250] eta: 0:12:38 lr: 0.000030 grad: 0.1986 (0.2349) loss: 0.6590 (0.6737) time: 0.1345 data: 0.0480 max mem: 9377 +Train: [69] [1900/6250] eta: 0:12:18 lr: 0.000030 grad: 0.1963 (0.2337) loss: 0.6649 (0.6732) time: 0.1263 data: 0.0424 max mem: 9377 +Train: [69] [2000/6250] eta: 0:12:00 lr: 0.000030 grad: 0.2025 (0.2323) loss: 0.6711 (0.6732) time: 0.1595 data: 0.0761 max mem: 9377 +Train: [69] [2100/6250] eta: 0:11:41 lr: 0.000029 grad: 0.2022 (0.2312) loss: 0.6479 (0.6727) time: 0.1642 data: 0.0735 max mem: 9377 +Train: [69] [2200/6250] eta: 0:11:24 lr: 0.000029 grad: 0.2035 (0.2303) loss: 0.6622 (0.6724) time: 0.1753 data: 0.1010 max mem: 9377 +Train: [69] [2300/6250] eta: 0:11:07 lr: 0.000029 grad: 0.1984 (0.2295) loss: 0.6694 (0.6720) time: 0.1772 data: 0.0979 max mem: 9377 +Train: [69] [2400/6250] eta: 0:10:48 lr: 0.000029 grad: 0.2081 (0.2286) loss: 0.6703 (0.6717) time: 0.1566 data: 0.0761 max mem: 9377 +Train: [69] [2500/6250] eta: 0:10:30 lr: 0.000029 grad: 0.2009 (0.2278) loss: 0.6672 (0.6715) time: 0.1040 data: 0.0157 max mem: 9377 +Train: [69] [2600/6250] eta: 0:10:13 lr: 0.000029 grad: 0.2068 (0.2272) loss: 0.6628 (0.6714) time: 0.1568 data: 0.0655 max mem: 9377 +Train: [69] [2700/6250] eta: 0:09:56 lr: 0.000029 grad: 0.1978 (0.2263) loss: 0.6636 (0.6712) time: 0.1665 data: 0.0733 max mem: 9377 +Train: [69] [2800/6250] eta: 0:09:40 lr: 0.000029 grad: 0.1920 (0.2255) loss: 0.6783 (0.6710) time: 0.1670 data: 0.0772 max mem: 9377 +Train: [69] [2900/6250] eta: 0:09:23 lr: 0.000029 grad: 0.2030 (0.2250) loss: 0.6691 (0.6709) time: 0.1658 data: 0.0593 max mem: 9377 +Train: [69] [3000/6250] eta: 0:09:05 lr: 0.000029 grad: 0.2039 (0.2247) loss: 0.6622 (0.6707) time: 0.1760 data: 0.0913 max mem: 9377 +Train: [69] [3100/6250] eta: 0:08:48 lr: 0.000029 grad: 0.2046 (0.2244) loss: 0.6668 (0.6705) time: 0.1429 data: 0.0446 max mem: 9377 +Train: [69] [3200/6250] eta: 0:08:30 lr: 0.000029 grad: 0.1981 (0.2241) loss: 0.6630 (0.6705) time: 0.1564 data: 0.0639 max mem: 9377 +Train: [69] [3300/6250] eta: 0:08:12 lr: 0.000029 grad: 0.1907 (0.2234) loss: 0.6716 (0.6706) time: 0.1543 data: 0.0627 max mem: 9377 +Train: [69] [3400/6250] eta: 0:07:55 lr: 0.000029 grad: 0.1958 (0.2231) loss: 0.6785 (0.6706) time: 0.1520 data: 0.0679 max mem: 9377 +Train: [69] [3500/6250] eta: 0:07:38 lr: 0.000029 grad: 0.1953 (0.2229) loss: 0.6724 (0.6707) time: 0.1481 data: 0.0501 max mem: 9377 +Train: [69] [3600/6250] eta: 0:07:21 lr: 0.000029 grad: 0.1977 (0.2223) loss: 0.6736 (0.6707) time: 0.1484 data: 0.0590 max mem: 9377 +Train: [69] [3700/6250] eta: 0:07:05 lr: 0.000029 grad: 0.2183 (0.2222) loss: 0.6649 (0.6705) time: 0.1162 data: 0.0289 max mem: 9377 +Train: [69] [3800/6250] eta: 0:06:46 lr: 0.000029 grad: 0.2059 (0.2222) loss: 0.6583 (0.6703) time: 0.1509 data: 0.0643 max mem: 9377 +Train: [69] [3900/6250] eta: 0:06:30 lr: 0.000029 grad: 0.1886 (0.2219) loss: 0.6564 (0.6702) time: 0.1685 data: 0.0751 max mem: 9377 +Train: [69] [4000/6250] eta: 0:06:13 lr: 0.000029 grad: 0.1911 (0.2213) loss: 0.6549 (0.6700) time: 0.1676 data: 0.0896 max mem: 9377 +Train: [69] [4100/6250] eta: 0:05:56 lr: 0.000029 grad: 0.1897 (0.2208) loss: 0.6652 (0.6697) time: 0.1536 data: 0.0684 max mem: 9377 +Train: [69] [4200/6250] eta: 0:05:39 lr: 0.000029 grad: 0.2012 (0.2204) loss: 0.6615 (0.6697) time: 0.1586 data: 0.0686 max mem: 9377 +Train: [69] [4300/6250] eta: 0:05:22 lr: 0.000029 grad: 0.1976 (0.2201) loss: 0.6445 (0.6694) time: 0.1563 data: 0.0665 max mem: 9377 +Train: [69] [4400/6250] eta: 0:05:05 lr: 0.000029 grad: 0.2010 (0.2198) loss: 0.6624 (0.6692) time: 0.1502 data: 0.0617 max mem: 9377 +Train: [69] [4500/6250] eta: 0:04:49 lr: 0.000029 grad: 0.1976 (0.2194) loss: 0.6496 (0.6690) time: 0.1619 data: 0.0643 max mem: 9377 +Train: [69] [4600/6250] eta: 0:04:32 lr: 0.000029 grad: 0.1948 (0.2191) loss: 0.6598 (0.6687) time: 0.1806 data: 0.0875 max mem: 9377 +Train: [69] [4700/6250] eta: 0:04:15 lr: 0.000029 grad: 0.1929 (0.2188) loss: 0.6702 (0.6686) time: 0.1707 data: 0.0835 max mem: 9377 +Train: [69] [4800/6250] eta: 0:03:58 lr: 0.000029 grad: 0.1953 (0.2187) loss: 0.6580 (0.6685) time: 0.1677 data: 0.0713 max mem: 9377 +Train: [69] [4900/6250] eta: 0:03:41 lr: 0.000029 grad: 0.2010 (0.2186) loss: 0.6482 (0.6683) time: 0.1490 data: 0.0449 max mem: 9377 +Train: [69] [5000/6250] eta: 0:03:25 lr: 0.000029 grad: 0.1978 (0.2185) loss: 0.6564 (0.6680) time: 0.1410 data: 0.0544 max mem: 9377 +Train: [69] [5100/6250] eta: 0:03:08 lr: 0.000029 grad: 0.1904 (0.2183) loss: 0.6743 (0.6679) time: 0.1720 data: 0.0812 max mem: 9377 +Train: [69] [5200/6250] eta: 0:02:51 lr: 0.000029 grad: 0.1970 (0.2182) loss: 0.6393 (0.6677) time: 0.1627 data: 0.0786 max mem: 9377 +Train: [69] [5300/6250] eta: 0:02:35 lr: 0.000029 grad: 0.1942 (0.2180) loss: 0.6490 (0.6675) time: 0.1482 data: 0.0542 max mem: 9377 +Train: [69] [5400/6250] eta: 0:02:18 lr: 0.000029 grad: 0.2013 (0.2178) loss: 0.6690 (0.6673) time: 0.1435 data: 0.0577 max mem: 9377 +Train: [69] [5500/6250] eta: 0:02:02 lr: 0.000029 grad: 0.1935 (0.2176) loss: 0.6553 (0.6673) time: 0.1426 data: 0.0460 max mem: 9377 +Train: [69] [5600/6250] eta: 0:01:46 lr: 0.000028 grad: 0.1920 (0.2177) loss: 0.6724 (0.6672) time: 0.1446 data: 0.0568 max mem: 9377 +Train: [69] [5700/6250] eta: 0:01:29 lr: 0.000028 grad: 0.2020 (0.2176) loss: 0.6589 (0.6671) time: 0.1520 data: 0.0640 max mem: 9377 +Train: [69] [5800/6250] eta: 0:01:13 lr: 0.000028 grad: 0.1979 (0.2175) loss: 0.6590 (0.6669) time: 0.1533 data: 0.0630 max mem: 9377 +Train: [69] [5900/6250] eta: 0:00:57 lr: 0.000028 grad: 0.1952 (0.2172) loss: 0.6657 (0.6669) time: 0.1315 data: 0.0342 max mem: 9377 +Train: [69] [6000/6250] eta: 0:00:40 lr: 0.000028 grad: 0.1977 (0.2170) loss: 0.6611 (0.6667) time: 0.1799 data: 0.0853 max mem: 9377 +Train: [69] [6100/6250] eta: 0:00:24 lr: 0.000028 grad: 0.1927 (0.2167) loss: 0.6566 (0.6667) time: 0.1570 data: 0.0695 max mem: 9377 +Train: [69] [6200/6250] eta: 0:00:08 lr: 0.000028 grad: 0.1974 (0.2165) loss: 0.6587 (0.6666) time: 0.1733 data: 0.0813 max mem: 9377 +Train: [69] [6249/6250] eta: 0:00:00 lr: 0.000028 grad: 0.2028 (0.2164) loss: 0.6689 (0.6666) time: 0.1637 data: 0.0801 max mem: 9377 +Train: [69] Total time: 0:17:04 (0.1639 s / it) +Averaged stats: lr: 0.000028 grad: 0.2028 (0.2164) loss: 0.6689 (0.6666) +Eval (hcp-train-subset): [69] [ 0/62] eta: 0:06:08 loss: 0.8993 (0.8993) time: 5.9468 data: 5.9143 max mem: 9377 +Eval (hcp-train-subset): [69] [61/62] eta: 0:00:00 loss: 0.9011 (0.9037) time: 0.1375 data: 0.1111 max mem: 9377 +Eval (hcp-train-subset): [69] Total time: 0:00:15 (0.2458 s / it) +Averaged stats (hcp-train-subset): loss: 0.9011 (0.9037) +Making plots (hcp-train-subset): example=41 +Eval (hcp-val): [69] [ 0/62] eta: 0:03:44 loss: 0.8969 (0.8969) time: 3.6236 data: 3.5506 max mem: 9377 +Eval (hcp-val): [69] [61/62] eta: 0:00:00 loss: 0.9042 (0.9033) time: 0.1516 data: 0.1266 max mem: 9377 +Eval (hcp-val): [69] Total time: 0:00:15 (0.2477 s / it) +Averaged stats (hcp-val): loss: 0.9042 (0.9033) +Making plots (hcp-val): example=18 +Eval (nsd-val): [69] [ 0/62] eta: 0:06:00 loss: 0.8955 (0.8955) time: 5.8174 data: 5.7863 max mem: 9377 +Eval (nsd-val): [69] [61/62] eta: 0:00:00 loss: 0.9022 (0.9056) time: 0.1121 data: 0.0868 max mem: 9377 +Eval (nsd-val): [69] Total time: 0:00:14 (0.2319 s / it) +Averaged stats (nsd-val): loss: 0.9022 (0.9056) +Making plots (nsd-val): example=16 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00069.pth +Train: [70] [ 0/6250] eta: 12:50:17 lr: 0.000028 grad: 0.2984 (0.2984) loss: 0.6665 (0.6665) time: 7.3949 data: 7.2878 max mem: 9377 +Train: [70] [ 100/6250] eta: 0:22:55 lr: 0.000028 grad: 0.3074 (0.3592) loss: 0.6709 (0.6858) time: 0.1437 data: 0.0417 max mem: 9377 +Train: [70] [ 200/6250] eta: 0:19:31 lr: 0.000028 grad: 0.2836 (0.3354) loss: 0.6876 (0.6797) time: 0.1583 data: 0.0484 max mem: 9377 +Train: [70] [ 300/6250] eta: 0:18:09 lr: 0.000028 grad: 0.2573 (0.3172) loss: 0.6748 (0.6784) time: 0.1460 data: 0.0389 max mem: 9377 +Train: [70] [ 400/6250] eta: 0:17:14 lr: 0.000028 grad: 0.2211 (0.2966) loss: 0.6691 (0.6764) time: 0.1718 data: 0.0631 max mem: 9377 +Train: [70] [ 500/6250] eta: 0:16:49 lr: 0.000028 grad: 0.2573 (0.2872) loss: 0.6531 (0.6742) time: 0.1380 data: 0.0342 max mem: 9377 +Train: [70] [ 600/6250] eta: 0:16:22 lr: 0.000028 grad: 0.2145 (0.2804) loss: 0.6843 (0.6745) time: 0.1710 data: 0.0661 max mem: 9377 +Train: [70] [ 700/6250] eta: 0:16:05 lr: 0.000028 grad: 0.2027 (0.2718) loss: 0.6701 (0.6752) time: 0.2095 data: 0.1084 max mem: 9377 +Train: [70] [ 800/6250] eta: 0:15:51 lr: 0.000028 grad: 0.2004 (0.2662) loss: 0.6634 (0.6750) time: 0.1655 data: 0.0785 max mem: 9377 +Train: [70] [ 900/6250] eta: 0:15:33 lr: 0.000028 grad: 0.1951 (0.2601) loss: 0.6620 (0.6747) time: 0.1597 data: 0.0676 max mem: 9377 +Train: [70] [1000/6250] eta: 0:15:10 lr: 0.000028 grad: 0.1980 (0.2554) loss: 0.6622 (0.6741) time: 0.1499 data: 0.0667 max mem: 9377 +Train: [70] [1100/6250] eta: 0:14:46 lr: 0.000028 grad: 0.1954 (0.2521) loss: 0.6798 (0.6741) time: 0.1791 data: 0.0959 max mem: 9377 +Train: [70] [1200/6250] eta: 0:14:34 lr: 0.000028 grad: 0.1946 (0.2482) loss: 0.6718 (0.6741) time: 0.2321 data: 0.1476 max mem: 9377 +Train: [70] [1300/6250] eta: 0:14:13 lr: 0.000028 grad: 0.1899 (0.2448) loss: 0.6772 (0.6740) time: 0.1732 data: 0.0803 max mem: 9377 +Train: [70] [1400/6250] eta: 0:13:54 lr: 0.000028 grad: 0.1969 (0.2424) loss: 0.6646 (0.6736) time: 0.1697 data: 0.0803 max mem: 9377 +Train: [70] [1500/6250] eta: 0:13:35 lr: 0.000028 grad: 0.1930 (0.2402) loss: 0.6793 (0.6734) time: 0.1311 data: 0.0368 max mem: 9377 +Train: [70] [1600/6250] eta: 0:13:16 lr: 0.000028 grad: 0.2004 (0.2379) loss: 0.6619 (0.6731) time: 0.1475 data: 0.0510 max mem: 9377 +Train: [70] [1700/6250] eta: 0:12:53 lr: 0.000028 grad: 0.1961 (0.2361) loss: 0.6761 (0.6733) time: 0.1634 data: 0.0684 max mem: 9377 +Train: [70] [1800/6250] eta: 0:12:31 lr: 0.000028 grad: 0.1926 (0.2344) loss: 0.6628 (0.6729) time: 0.1257 data: 0.0352 max mem: 9377 +Train: [70] [1900/6250] eta: 0:12:13 lr: 0.000028 grad: 0.1912 (0.2329) loss: 0.6792 (0.6730) time: 0.1552 data: 0.0773 max mem: 9377 +Train: [70] [2000/6250] eta: 0:11:55 lr: 0.000028 grad: 0.1946 (0.2315) loss: 0.6623 (0.6728) time: 0.1568 data: 0.0710 max mem: 9377 +Train: [70] [2100/6250] eta: 0:11:34 lr: 0.000028 grad: 0.1861 (0.2302) loss: 0.6562 (0.6725) time: 0.1454 data: 0.0593 max mem: 9377 +Train: [70] [2200/6250] eta: 0:11:18 lr: 0.000028 grad: 0.1925 (0.2291) loss: 0.6687 (0.6723) time: 0.2202 data: 0.1489 max mem: 9377 +Train: [70] [2300/6250] eta: 0:11:00 lr: 0.000028 grad: 0.1921 (0.2284) loss: 0.6759 (0.6722) time: 0.1536 data: 0.0704 max mem: 9377 +Train: [70] [2400/6250] eta: 0:10:43 lr: 0.000028 grad: 0.1921 (0.2272) loss: 0.6759 (0.6723) time: 0.1555 data: 0.0692 max mem: 9377 +Train: [70] [2500/6250] eta: 0:10:25 lr: 0.000028 grad: 0.2001 (0.2267) loss: 0.6605 (0.6720) time: 0.1369 data: 0.0528 max mem: 9377 +Train: [70] [2600/6250] eta: 0:10:06 lr: 0.000028 grad: 0.2118 (0.2266) loss: 0.6719 (0.6719) time: 0.1168 data: 0.0288 max mem: 9377 +Train: [70] [2700/6250] eta: 0:09:50 lr: 0.000028 grad: 0.1958 (0.2258) loss: 0.6682 (0.6717) time: 0.1610 data: 0.0799 max mem: 9377 +Train: [70] [2800/6250] eta: 0:09:33 lr: 0.000028 grad: 0.2001 (0.2251) loss: 0.6639 (0.6715) time: 0.1591 data: 0.0691 max mem: 9377 +Train: [70] [2900/6250] eta: 0:09:16 lr: 0.000028 grad: 0.1929 (0.2245) loss: 0.6688 (0.6713) time: 0.1702 data: 0.0902 max mem: 9377 +Train: [70] [3000/6250] eta: 0:08:57 lr: 0.000027 grad: 0.1905 (0.2238) loss: 0.6657 (0.6712) time: 0.1342 data: 0.0455 max mem: 9377 +Train: [70] [3100/6250] eta: 0:08:39 lr: 0.000027 grad: 0.1946 (0.2232) loss: 0.6670 (0.6709) time: 0.1611 data: 0.0689 max mem: 9377 +Train: [70] [3200/6250] eta: 0:08:21 lr: 0.000027 grad: 0.1913 (0.2225) loss: 0.6658 (0.6708) time: 0.1670 data: 0.0781 max mem: 9377 +Train: [70] [3300/6250] eta: 0:08:03 lr: 0.000027 grad: 0.1974 (0.2222) loss: 0.6660 (0.6708) time: 0.1358 data: 0.0485 max mem: 9377 +Train: [70] [3400/6250] eta: 0:07:45 lr: 0.000027 grad: 0.1918 (0.2216) loss: 0.6705 (0.6707) time: 0.1649 data: 0.0802 max mem: 9377 +Train: [70] [3500/6250] eta: 0:07:28 lr: 0.000027 grad: 0.1994 (0.2213) loss: 0.6727 (0.6707) time: 0.1577 data: 0.0696 max mem: 9377 +Train: [70] [3600/6250] eta: 0:07:14 lr: 0.000027 grad: 0.1902 (0.2207) loss: 0.6726 (0.6708) time: 0.1142 data: 0.0079 max mem: 9377 +Train: [70] [3700/6250] eta: 0:06:56 lr: 0.000027 grad: 0.1933 (0.2202) loss: 0.6828 (0.6709) time: 0.1267 data: 0.0471 max mem: 9377 +Train: [70] [3800/6250] eta: 0:06:40 lr: 0.000027 grad: 0.1899 (0.2196) loss: 0.6657 (0.6709) time: 0.1559 data: 0.0751 max mem: 9377 +Train: [70] [3900/6250] eta: 0:06:24 lr: 0.000027 grad: 0.1889 (0.2190) loss: 0.6583 (0.6709) time: 0.1821 data: 0.1013 max mem: 9377 +Train: [70] [4000/6250] eta: 0:06:07 lr: 0.000027 grad: 0.1930 (0.2188) loss: 0.6565 (0.6708) time: 0.1023 data: 0.0129 max mem: 9377 +Train: [70] [4100/6250] eta: 0:05:51 lr: 0.000027 grad: 0.1987 (0.2189) loss: 0.6659 (0.6707) time: 0.1542 data: 0.0738 max mem: 9377 +Train: [70] [4200/6250] eta: 0:05:35 lr: 0.000027 grad: 0.1849 (0.2187) loss: 0.6562 (0.6708) time: 0.1754 data: 0.0947 max mem: 9377 +Train: [70] [4300/6250] eta: 0:05:19 lr: 0.000027 grad: 0.1954 (0.2189) loss: 0.6768 (0.6709) time: 0.1470 data: 0.0593 max mem: 9377 +Train: [70] [4400/6250] eta: 0:05:02 lr: 0.000027 grad: 0.1903 (0.2184) loss: 0.6787 (0.6711) time: 0.1735 data: 0.0887 max mem: 9377 +Train: [70] [4500/6250] eta: 0:04:46 lr: 0.000027 grad: 0.1991 (0.2182) loss: 0.6639 (0.6712) time: 0.1632 data: 0.0731 max mem: 9377 +Train: [70] [4600/6250] eta: 0:04:30 lr: 0.000027 grad: 0.1941 (0.2181) loss: 0.6707 (0.6713) time: 0.1658 data: 0.0711 max mem: 9377 +Train: [70] [4700/6250] eta: 0:04:14 lr: 0.000027 grad: 0.1932 (0.2178) loss: 0.6649 (0.6713) time: 0.1556 data: 0.0704 max mem: 9377 +Train: [70] [4800/6250] eta: 0:03:57 lr: 0.000027 grad: 0.1962 (0.2175) loss: 0.6760 (0.6713) time: 0.1561 data: 0.0601 max mem: 9377 +Train: [70] [4900/6250] eta: 0:03:40 lr: 0.000027 grad: 0.1936 (0.2174) loss: 0.6683 (0.6713) time: 0.1373 data: 0.0482 max mem: 9377 +Train: [70] [5000/6250] eta: 0:03:24 lr: 0.000027 grad: 0.1922 (0.2175) loss: 0.6828 (0.6713) time: 0.2256 data: 0.0647 max mem: 9377 +Train: [70] [5100/6250] eta: 0:03:08 lr: 0.000027 grad: 0.2037 (0.2171) loss: 0.6695 (0.6714) time: 0.1612 data: 0.0756 max mem: 9377 +Train: [70] [5200/6250] eta: 0:02:51 lr: 0.000027 grad: 0.1891 (0.2168) loss: 0.6728 (0.6713) time: 0.2012 data: 0.1092 max mem: 9377 +Train: [70] [5300/6250] eta: 0:02:35 lr: 0.000027 grad: 0.1952 (0.2167) loss: 0.6643 (0.6712) time: 0.1539 data: 0.0617 max mem: 9377 +Train: [70] [5400/6250] eta: 0:02:18 lr: 0.000027 grad: 0.1984 (0.2164) loss: 0.6490 (0.6711) time: 0.1600 data: 0.0732 max mem: 9377 +Train: [70] [5500/6250] eta: 0:02:02 lr: 0.000027 grad: 0.2001 (0.2165) loss: 0.6675 (0.6710) time: 0.1405 data: 0.0423 max mem: 9377 +Train: [70] [5600/6250] eta: 0:01:46 lr: 0.000027 grad: 0.1955 (0.2164) loss: 0.6603 (0.6708) time: 0.1922 data: 0.1013 max mem: 9377 +Train: [70] [5700/6250] eta: 0:01:29 lr: 0.000027 grad: 0.1987 (0.2162) loss: 0.6607 (0.6707) time: 0.1588 data: 0.0712 max mem: 9377 +Train: [70] [5800/6250] eta: 0:01:13 lr: 0.000027 grad: 0.1940 (0.2159) loss: 0.6801 (0.6707) time: 0.1308 data: 0.0325 max mem: 9377 +Train: [70] [5900/6250] eta: 0:00:56 lr: 0.000027 grad: 0.2027 (0.2159) loss: 0.6527 (0.6707) time: 0.1575 data: 0.0671 max mem: 9377 +Train: [70] [6000/6250] eta: 0:00:40 lr: 0.000027 grad: 0.1932 (0.2158) loss: 0.6705 (0.6707) time: 0.1383 data: 0.0460 max mem: 9377 +Train: [70] [6100/6250] eta: 0:00:24 lr: 0.000027 grad: 0.1959 (0.2158) loss: 0.6683 (0.6707) time: 0.1710 data: 0.0803 max mem: 9377 +Train: [70] [6200/6250] eta: 0:00:08 lr: 0.000027 grad: 0.1947 (0.2157) loss: 0.6775 (0.6706) time: 0.1478 data: 0.0576 max mem: 9377 +Train: [70] [6249/6250] eta: 0:00:00 lr: 0.000027 grad: 0.2001 (0.2157) loss: 0.6777 (0.6706) time: 0.1455 data: 0.0524 max mem: 9377 +Train: [70] Total time: 0:16:59 (0.1632 s / it) +Averaged stats: lr: 0.000027 grad: 0.2001 (0.2157) loss: 0.6777 (0.6706) +Eval (hcp-train-subset): [70] [ 0/62] eta: 0:04:08 loss: 0.8929 (0.8929) time: 4.0059 data: 3.9016 max mem: 9377 +Eval (hcp-train-subset): [70] [61/62] eta: 0:00:00 loss: 0.9046 (0.9050) time: 0.1630 data: 0.1373 max mem: 9377 +Eval (hcp-train-subset): [70] Total time: 0:00:16 (0.2600 s / it) +Averaged stats (hcp-train-subset): loss: 0.9046 (0.9050) +Eval (hcp-val): [70] [ 0/62] eta: 0:05:54 loss: 0.9032 (0.9032) time: 5.7098 data: 5.6779 max mem: 9377 +Eval (hcp-val): [70] [61/62] eta: 0:00:00 loss: 0.9029 (0.9057) time: 0.1482 data: 0.1222 max mem: 9377 +Eval (hcp-val): [70] Total time: 0:00:16 (0.2705 s / it) +Averaged stats (hcp-val): loss: 0.9029 (0.9057) +Eval (nsd-val): [70] [ 0/62] eta: 0:05:55 loss: 0.9141 (0.9141) time: 5.7417 data: 5.7043 max mem: 9377 +Eval (nsd-val): [70] [61/62] eta: 0:00:00 loss: 0.9076 (0.9116) time: 0.1590 data: 0.1316 max mem: 9377 +Eval (nsd-val): [70] Total time: 0:00:16 (0.2610 s / it) +Averaged stats (nsd-val): loss: 0.9076 (0.9116) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [71] [ 0/6250] eta: 9:09:07 lr: 0.000027 grad: 0.2397 (0.2397) loss: 0.7936 (0.7936) time: 5.2715 data: 4.9656 max mem: 9377 +Train: [71] [ 100/6250] eta: 0:25:02 lr: 0.000027 grad: 0.3434 (0.3766) loss: 0.6861 (0.6818) time: 0.1949 data: 0.0824 max mem: 9377 +Train: [71] [ 200/6250] eta: 0:21:25 lr: 0.000027 grad: 0.2939 (0.3609) loss: 0.6675 (0.6774) time: 0.1803 data: 0.0702 max mem: 9377 +Train: [71] [ 300/6250] eta: 0:20:17 lr: 0.000027 grad: 0.2657 (0.3403) loss: 0.6291 (0.6726) time: 0.2082 data: 0.0996 max mem: 9377 +Train: [71] [ 400/6250] eta: 0:19:00 lr: 0.000026 grad: 0.2393 (0.3203) loss: 0.6618 (0.6712) time: 0.1541 data: 0.0578 max mem: 9377 +Train: [71] [ 500/6250] eta: 0:18:12 lr: 0.000026 grad: 0.2108 (0.3053) loss: 0.6650 (0.6705) time: 0.1815 data: 0.0806 max mem: 9377 +Train: [71] [ 600/6250] eta: 0:17:26 lr: 0.000026 grad: 0.2110 (0.2928) loss: 0.6422 (0.6696) time: 0.1595 data: 0.0647 max mem: 9377 +Train: [71] [ 700/6250] eta: 0:16:56 lr: 0.000026 grad: 0.2081 (0.2828) loss: 0.6578 (0.6686) time: 0.1815 data: 0.0942 max mem: 9377 +Train: [71] [ 800/6250] eta: 0:16:27 lr: 0.000026 grad: 0.2010 (0.2734) loss: 0.6638 (0.6675) time: 0.1596 data: 0.0682 max mem: 9377 +Train: [71] [ 900/6250] eta: 0:16:11 lr: 0.000026 grad: 0.2007 (0.2664) loss: 0.6674 (0.6673) time: 0.2001 data: 0.1126 max mem: 9377 +Train: [71] [1000/6250] eta: 0:15:47 lr: 0.000026 grad: 0.1949 (0.2611) loss: 0.6785 (0.6674) time: 0.1649 data: 0.0644 max mem: 9377 +Train: [71] [1100/6250] eta: 0:15:22 lr: 0.000026 grad: 0.1997 (0.2562) loss: 0.6416 (0.6669) time: 0.1599 data: 0.0683 max mem: 9377 +Train: [71] [1200/6250] eta: 0:15:00 lr: 0.000026 grad: 0.1965 (0.2521) loss: 0.6676 (0.6673) time: 0.1814 data: 0.0954 max mem: 9377 +Train: [71] [1300/6250] eta: 0:14:41 lr: 0.000026 grad: 0.1967 (0.2490) loss: 0.6603 (0.6668) time: 0.1683 data: 0.0754 max mem: 9377 +Train: [71] [1400/6250] eta: 0:14:19 lr: 0.000026 grad: 0.1941 (0.2466) loss: 0.6676 (0.6669) time: 0.1502 data: 0.0557 max mem: 9377 +Train: [71] [1500/6250] eta: 0:13:58 lr: 0.000026 grad: 0.1971 (0.2434) loss: 0.6647 (0.6669) time: 0.1799 data: 0.0890 max mem: 9377 +Train: [71] [1600/6250] eta: 0:13:36 lr: 0.000026 grad: 0.1926 (0.2416) loss: 0.6682 (0.6669) time: 0.1718 data: 0.0820 max mem: 9377 +Train: [71] [1700/6250] eta: 0:13:15 lr: 0.000026 grad: 0.1952 (0.2398) loss: 0.6735 (0.6672) time: 0.2045 data: 0.1140 max mem: 9377 +Train: [71] [1800/6250] eta: 0:12:53 lr: 0.000026 grad: 0.1870 (0.2379) loss: 0.6775 (0.6675) time: 0.1631 data: 0.0712 max mem: 9377 +Train: [71] [1900/6250] eta: 0:12:31 lr: 0.000026 grad: 0.1894 (0.2362) loss: 0.6750 (0.6677) time: 0.1498 data: 0.0456 max mem: 9377 +Train: [71] [2000/6250] eta: 0:12:11 lr: 0.000026 grad: 0.1919 (0.2343) loss: 0.6682 (0.6679) time: 0.1599 data: 0.0743 max mem: 9377 +Train: [71] [2100/6250] eta: 0:11:52 lr: 0.000026 grad: 0.1963 (0.2329) loss: 0.6698 (0.6679) time: 0.1907 data: 0.1058 max mem: 9377 +Train: [71] [2200/6250] eta: 0:11:31 lr: 0.000026 grad: 0.1945 (0.2315) loss: 0.6768 (0.6679) time: 0.1602 data: 0.0667 max mem: 9377 +Train: [71] [2300/6250] eta: 0:11:11 lr: 0.000026 grad: 0.1923 (0.2304) loss: 0.6525 (0.6679) time: 0.1469 data: 0.0656 max mem: 9377 +Train: [71] [2400/6250] eta: 0:10:55 lr: 0.000026 grad: 0.1943 (0.2294) loss: 0.6614 (0.6678) time: 0.1535 data: 0.0701 max mem: 9377 +Train: [71] [2500/6250] eta: 0:10:38 lr: 0.000026 grad: 0.2014 (0.2284) loss: 0.6660 (0.6677) time: 0.1654 data: 0.0742 max mem: 9377 +Train: [71] [2600/6250] eta: 0:10:19 lr: 0.000026 grad: 0.1973 (0.2277) loss: 0.6629 (0.6678) time: 0.1603 data: 0.0756 max mem: 9377 +Train: [71] [2700/6250] eta: 0:10:00 lr: 0.000026 grad: 0.1986 (0.2268) loss: 0.6628 (0.6678) time: 0.1778 data: 0.0954 max mem: 9377 +Train: [71] [2800/6250] eta: 0:09:43 lr: 0.000026 grad: 0.1941 (0.2264) loss: 0.6707 (0.6678) time: 0.1658 data: 0.0754 max mem: 9377 +Train: [71] [2900/6250] eta: 0:09:26 lr: 0.000026 grad: 0.1970 (0.2262) loss: 0.6713 (0.6677) time: 0.1513 data: 0.0598 max mem: 9377 +Train: [71] [3000/6250] eta: 0:09:08 lr: 0.000026 grad: 0.2027 (0.2257) loss: 0.6538 (0.6677) time: 0.1647 data: 0.0764 max mem: 9377 +Train: [71] [3100/6250] eta: 0:08:49 lr: 0.000026 grad: 0.1973 (0.2253) loss: 0.6614 (0.6676) time: 0.1510 data: 0.0620 max mem: 9377 +Train: [71] [3200/6250] eta: 0:08:31 lr: 0.000026 grad: 0.1987 (0.2246) loss: 0.6683 (0.6675) time: 0.1303 data: 0.0433 max mem: 9377 +Train: [71] [3300/6250] eta: 0:08:13 lr: 0.000026 grad: 0.1954 (0.2244) loss: 0.6658 (0.6675) time: 0.1526 data: 0.0575 max mem: 9377 +Train: [71] [3400/6250] eta: 0:07:54 lr: 0.000026 grad: 0.1880 (0.2239) loss: 0.6690 (0.6676) time: 0.1338 data: 0.0291 max mem: 9377 +Train: [71] [3500/6250] eta: 0:07:36 lr: 0.000026 grad: 0.1962 (0.2236) loss: 0.6601 (0.6678) time: 0.1341 data: 0.0334 max mem: 9377 +Train: [71] [3600/6250] eta: 0:07:18 lr: 0.000026 grad: 0.2003 (0.2230) loss: 0.6769 (0.6679) time: 0.1560 data: 0.0648 max mem: 9377 +Train: [71] [3700/6250] eta: 0:07:00 lr: 0.000026 grad: 0.1882 (0.2224) loss: 0.6675 (0.6680) time: 0.1425 data: 0.0473 max mem: 9377 +Train: [71] [3800/6250] eta: 0:06:43 lr: 0.000026 grad: 0.1928 (0.2221) loss: 0.6739 (0.6681) time: 0.1693 data: 0.0821 max mem: 9377 +Train: [71] [3900/6250] eta: 0:06:26 lr: 0.000026 grad: 0.1898 (0.2219) loss: 0.6702 (0.6682) time: 0.1646 data: 0.0763 max mem: 9377 +Train: [71] [4000/6250] eta: 0:06:09 lr: 0.000026 grad: 0.1911 (0.2215) loss: 0.6635 (0.6684) time: 0.1523 data: 0.0615 max mem: 9377 +Train: [71] [4100/6250] eta: 0:05:52 lr: 0.000026 grad: 0.1907 (0.2213) loss: 0.6626 (0.6684) time: 0.1695 data: 0.0821 max mem: 9377 +Train: [71] [4200/6250] eta: 0:05:37 lr: 0.000025 grad: 0.1863 (0.2210) loss: 0.6802 (0.6685) time: 0.1868 data: 0.1063 max mem: 9377 +Train: [71] [4300/6250] eta: 0:05:21 lr: 0.000025 grad: 0.1924 (0.2206) loss: 0.6648 (0.6685) time: 0.1716 data: 0.0775 max mem: 9377 +Train: [71] [4400/6250] eta: 0:05:04 lr: 0.000025 grad: 0.1935 (0.2203) loss: 0.6676 (0.6686) time: 0.1380 data: 0.0486 max mem: 9377 +Train: [71] [4500/6250] eta: 0:04:48 lr: 0.000025 grad: 0.1905 (0.2199) loss: 0.6613 (0.6686) time: 0.1817 data: 0.0965 max mem: 9377 +Train: [71] [4600/6250] eta: 0:04:31 lr: 0.000025 grad: 0.1931 (0.2197) loss: 0.6648 (0.6686) time: 0.1683 data: 0.0701 max mem: 9377 +Train: [71] [4700/6250] eta: 0:04:15 lr: 0.000025 grad: 0.1948 (0.2195) loss: 0.6579 (0.6685) time: 0.1770 data: 0.0816 max mem: 9377 +Train: [71] [4800/6250] eta: 0:03:58 lr: 0.000025 grad: 0.1885 (0.2191) loss: 0.6774 (0.6686) time: 0.1682 data: 0.0816 max mem: 9377 +Train: [71] [4900/6250] eta: 0:03:41 lr: 0.000025 grad: 0.1931 (0.2188) loss: 0.6623 (0.6686) time: 0.1531 data: 0.0688 max mem: 9377 +Train: [71] [5000/6250] eta: 0:03:25 lr: 0.000025 grad: 0.1941 (0.2185) loss: 0.6747 (0.6686) time: 0.1664 data: 0.0720 max mem: 9377 +Train: [71] [5100/6250] eta: 0:03:08 lr: 0.000025 grad: 0.1899 (0.2182) loss: 0.6769 (0.6687) time: 0.1611 data: 0.0692 max mem: 9377 +Train: [71] [5200/6250] eta: 0:02:51 lr: 0.000025 grad: 0.1888 (0.2179) loss: 0.6747 (0.6687) time: 0.1498 data: 0.0660 max mem: 9377 +Train: [71] [5300/6250] eta: 0:02:35 lr: 0.000025 grad: 0.1952 (0.2177) loss: 0.6592 (0.6687) time: 0.1468 data: 0.0659 max mem: 9377 +Train: [71] [5400/6250] eta: 0:02:18 lr: 0.000025 grad: 0.1968 (0.2176) loss: 0.6669 (0.6686) time: 0.1582 data: 0.0710 max mem: 9377 +Train: [71] [5500/6250] eta: 0:02:01 lr: 0.000025 grad: 0.1964 (0.2174) loss: 0.6708 (0.6686) time: 0.1579 data: 0.0772 max mem: 9377 +Train: [71] [5600/6250] eta: 0:01:45 lr: 0.000025 grad: 0.1943 (0.2173) loss: 0.6688 (0.6686) time: 0.1687 data: 0.0804 max mem: 9377 +Train: [71] [5700/6250] eta: 0:01:29 lr: 0.000025 grad: 0.1962 (0.2171) loss: 0.6666 (0.6685) time: 0.1536 data: 0.0572 max mem: 9377 +Train: [71] [5800/6250] eta: 0:01:12 lr: 0.000025 grad: 0.1997 (0.2171) loss: 0.6642 (0.6687) time: 0.1646 data: 0.0711 max mem: 9377 +Train: [71] [5900/6250] eta: 0:00:56 lr: 0.000025 grad: 0.1988 (0.2171) loss: 0.6715 (0.6687) time: 0.1560 data: 0.0615 max mem: 9377 +Train: [71] [6000/6250] eta: 0:00:40 lr: 0.000025 grad: 0.1944 (0.2171) loss: 0.6867 (0.6688) time: 0.1524 data: 0.0660 max mem: 9377 +Train: [71] [6100/6250] eta: 0:00:24 lr: 0.000025 grad: 0.1954 (0.2170) loss: 0.6617 (0.6689) time: 0.1623 data: 0.0715 max mem: 9377 +Train: [71] [6200/6250] eta: 0:00:08 lr: 0.000025 grad: 0.1956 (0.2168) loss: 0.6847 (0.6691) time: 0.1465 data: 0.0596 max mem: 9377 +Train: [71] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.1943 (0.2167) loss: 0.6656 (0.6690) time: 0.1558 data: 0.0706 max mem: 9377 +Train: [71] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000025 grad: 0.1943 (0.2167) loss: 0.6656 (0.6690) +Eval (hcp-train-subset): [71] [ 0/62] eta: 0:04:31 loss: 0.8934 (0.8934) time: 4.3805 data: 4.2729 max mem: 9377 +Eval (hcp-train-subset): [71] [61/62] eta: 0:00:00 loss: 0.9032 (0.9044) time: 0.1493 data: 0.1238 max mem: 9377 +Eval (hcp-train-subset): [71] Total time: 0:00:15 (0.2454 s / it) +Averaged stats (hcp-train-subset): loss: 0.9032 (0.9044) +Eval (hcp-val): [71] [ 0/62] eta: 0:05:00 loss: 0.9010 (0.9010) time: 4.8445 data: 4.7625 max mem: 9377 +Eval (hcp-val): [71] [61/62] eta: 0:00:00 loss: 0.9001 (0.9038) time: 0.1265 data: 0.1013 max mem: 9377 +Eval (hcp-val): [71] Total time: 0:00:15 (0.2542 s / it) +Averaged stats (hcp-val): loss: 0.9001 (0.9038) +Eval (nsd-val): [71] [ 0/62] eta: 0:06:15 loss: 0.9023 (0.9023) time: 6.0590 data: 6.0267 max mem: 9377 +Eval (nsd-val): [71] [61/62] eta: 0:00:00 loss: 0.9001 (0.9042) time: 0.1427 data: 0.1170 max mem: 9377 +Eval (nsd-val): [71] Total time: 0:00:15 (0.2478 s / it) +Averaged stats (nsd-val): loss: 0.9001 (0.9042) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [72] [ 0/6250] eta: 11:28:35 lr: 0.000025 grad: 0.2420 (0.2420) loss: 0.7056 (0.7056) time: 6.6104 data: 6.5140 max mem: 9377 +Train: [72] [ 100/6250] eta: 0:22:59 lr: 0.000025 grad: 0.3081 (0.3315) loss: 0.6759 (0.6963) time: 0.1755 data: 0.0579 max mem: 9377 +Train: [72] [ 200/6250] eta: 0:20:13 lr: 0.000025 grad: 0.2805 (0.3129) loss: 0.6378 (0.6745) time: 0.1697 data: 0.0636 max mem: 9377 +Train: [72] [ 300/6250] eta: 0:19:12 lr: 0.000025 grad: 0.2657 (0.3010) loss: 0.6516 (0.6651) time: 0.1738 data: 0.0626 max mem: 9377 +Train: [72] [ 400/6250] eta: 0:18:16 lr: 0.000025 grad: 0.2270 (0.2865) loss: 0.6695 (0.6635) time: 0.1817 data: 0.0838 max mem: 9377 +Train: [72] [ 500/6250] eta: 0:17:32 lr: 0.000025 grad: 0.2143 (0.2742) loss: 0.6686 (0.6645) time: 0.1493 data: 0.0466 max mem: 9377 +Train: [72] [ 600/6250] eta: 0:17:02 lr: 0.000025 grad: 0.2056 (0.2655) loss: 0.6559 (0.6645) time: 0.1890 data: 0.0939 max mem: 9377 +Train: [72] [ 700/6250] eta: 0:16:21 lr: 0.000025 grad: 0.1830 (0.2576) loss: 0.6839 (0.6653) time: 0.1541 data: 0.0548 max mem: 9377 +Train: [72] [ 800/6250] eta: 0:15:56 lr: 0.000025 grad: 0.2049 (0.2525) loss: 0.6463 (0.6650) time: 0.1729 data: 0.0777 max mem: 9377 +Train: [72] [ 900/6250] eta: 0:15:39 lr: 0.000025 grad: 0.1987 (0.2481) loss: 0.6719 (0.6652) time: 0.1744 data: 0.0911 max mem: 9377 +Train: [72] [1000/6250] eta: 0:15:15 lr: 0.000025 grad: 0.1966 (0.2450) loss: 0.6595 (0.6656) time: 0.1770 data: 0.0890 max mem: 9377 +Train: [72] [1100/6250] eta: 0:14:55 lr: 0.000025 grad: 0.1983 (0.2414) loss: 0.6629 (0.6657) time: 0.1620 data: 0.0696 max mem: 9377 +Train: [72] [1200/6250] eta: 0:14:28 lr: 0.000025 grad: 0.1953 (0.2384) loss: 0.6603 (0.6658) time: 0.1531 data: 0.0609 max mem: 9377 +Train: [72] [1300/6250] eta: 0:14:07 lr: 0.000025 grad: 0.2005 (0.2370) loss: 0.6694 (0.6658) time: 0.1542 data: 0.0605 max mem: 9377 +Train: [72] [1400/6250] eta: 0:13:44 lr: 0.000025 grad: 0.1989 (0.2361) loss: 0.6548 (0.6659) time: 0.1527 data: 0.0606 max mem: 9377 +Train: [72] [1500/6250] eta: 0:13:25 lr: 0.000025 grad: 0.2012 (0.2346) loss: 0.6618 (0.6661) time: 0.1667 data: 0.0844 max mem: 9377 +Train: [72] [1600/6250] eta: 0:13:03 lr: 0.000025 grad: 0.1998 (0.2328) loss: 0.6544 (0.6662) time: 0.1659 data: 0.0635 max mem: 9377 +Train: [72] [1700/6250] eta: 0:12:42 lr: 0.000024 grad: 0.2011 (0.2319) loss: 0.6732 (0.6666) time: 0.1410 data: 0.0516 max mem: 9377 +Train: [72] [1800/6250] eta: 0:12:23 lr: 0.000024 grad: 0.1910 (0.2304) loss: 0.6657 (0.6668) time: 0.1605 data: 0.0685 max mem: 9377 +Train: [72] [1900/6250] eta: 0:12:02 lr: 0.000024 grad: 0.1974 (0.2294) loss: 0.6763 (0.6673) time: 0.1269 data: 0.0283 max mem: 9377 +Train: [72] [2000/6250] eta: 0:11:44 lr: 0.000024 grad: 0.2082 (0.2282) loss: 0.6704 (0.6676) time: 0.1617 data: 0.0732 max mem: 9377 +Train: [72] [2100/6250] eta: 0:11:29 lr: 0.000024 grad: 0.1918 (0.2269) loss: 0.6717 (0.6677) time: 0.1557 data: 0.0624 max mem: 9377 +Train: [72] [2200/6250] eta: 0:11:13 lr: 0.000024 grad: 0.1943 (0.2258) loss: 0.6653 (0.6678) time: 0.1789 data: 0.0993 max mem: 9377 +Train: [72] [2300/6250] eta: 0:10:57 lr: 0.000024 grad: 0.1981 (0.2248) loss: 0.6674 (0.6682) time: 0.1743 data: 0.1016 max mem: 9377 +Train: [72] [2400/6250] eta: 0:10:41 lr: 0.000024 grad: 0.1891 (0.2245) loss: 0.6744 (0.6686) time: 0.2098 data: 0.1317 max mem: 9377 +Train: [72] [2500/6250] eta: 0:10:21 lr: 0.000024 grad: 0.1917 (0.2238) loss: 0.6723 (0.6687) time: 0.1439 data: 0.0570 max mem: 9377 +Train: [72] [2600/6250] eta: 0:10:04 lr: 0.000024 grad: 0.1859 (0.2228) loss: 0.6779 (0.6690) time: 0.1574 data: 0.0739 max mem: 9377 +Train: [72] [2700/6250] eta: 0:09:47 lr: 0.000024 grad: 0.1940 (0.2219) loss: 0.6724 (0.6693) time: 0.1428 data: 0.0645 max mem: 9377 +Train: [72] [2800/6250] eta: 0:09:30 lr: 0.000024 grad: 0.1993 (0.2218) loss: 0.6635 (0.6695) time: 0.1647 data: 0.0881 max mem: 9377 +Train: [72] [2900/6250] eta: 0:09:13 lr: 0.000024 grad: 0.1914 (0.2215) loss: 0.6734 (0.6696) time: 0.1634 data: 0.0776 max mem: 9377 +Train: [72] [3000/6250] eta: 0:08:57 lr: 0.000024 grad: 0.1915 (0.2214) loss: 0.6770 (0.6698) time: 0.2042 data: 0.1100 max mem: 9377 +Train: [72] [3100/6250] eta: 0:08:40 lr: 0.000024 grad: 0.1958 (0.2207) loss: 0.6610 (0.6701) time: 0.1660 data: 0.0675 max mem: 9377 +Train: [72] [3200/6250] eta: 0:08:23 lr: 0.000024 grad: 0.1953 (0.2203) loss: 0.6556 (0.6702) time: 0.1527 data: 0.0656 max mem: 9377 +Train: [72] [3300/6250] eta: 0:08:05 lr: 0.000024 grad: 0.1921 (0.2198) loss: 0.6681 (0.6703) time: 0.1648 data: 0.0750 max mem: 9377 +Train: [72] [3400/6250] eta: 0:07:48 lr: 0.000024 grad: 0.1944 (0.2196) loss: 0.6709 (0.6703) time: 0.1471 data: 0.0548 max mem: 9377 +Train: [72] [3500/6250] eta: 0:07:30 lr: 0.000024 grad: 0.1936 (0.2189) loss: 0.6847 (0.6704) time: 0.1363 data: 0.0448 max mem: 9377 +Train: [72] [3600/6250] eta: 0:07:13 lr: 0.000024 grad: 0.1869 (0.2184) loss: 0.6796 (0.6704) time: 0.1591 data: 0.0689 max mem: 9377 +Train: [72] [3700/6250] eta: 0:06:57 lr: 0.000024 grad: 0.1940 (0.2182) loss: 0.6706 (0.6704) time: 0.1617 data: 0.0818 max mem: 9377 +Train: [72] [3800/6250] eta: 0:06:40 lr: 0.000024 grad: 0.1928 (0.2181) loss: 0.6658 (0.6704) time: 0.1625 data: 0.0777 max mem: 9377 +Train: [72] [3900/6250] eta: 0:06:23 lr: 0.000024 grad: 0.1916 (0.2177) loss: 0.6587 (0.6704) time: 0.1549 data: 0.0816 max mem: 9377 +Train: [72] [4000/6250] eta: 0:06:06 lr: 0.000024 grad: 0.1912 (0.2175) loss: 0.6823 (0.6705) time: 0.1762 data: 0.0952 max mem: 9377 +Train: [72] [4100/6250] eta: 0:05:50 lr: 0.000024 grad: 0.1968 (0.2177) loss: 0.6588 (0.6705) time: 0.1871 data: 0.1034 max mem: 9377 +Train: [72] [4200/6250] eta: 0:05:33 lr: 0.000024 grad: 0.1975 (0.2172) loss: 0.6787 (0.6707) time: 0.1617 data: 0.0737 max mem: 9377 +Train: [72] [4300/6250] eta: 0:05:17 lr: 0.000024 grad: 0.1950 (0.2168) loss: 0.6862 (0.6708) time: 0.1840 data: 0.0948 max mem: 9377 +Train: [72] [4400/6250] eta: 0:05:02 lr: 0.000024 grad: 0.1937 (0.2166) loss: 0.6766 (0.6709) time: 0.1741 data: 0.0839 max mem: 9377 +Train: [72] [4500/6250] eta: 0:04:45 lr: 0.000024 grad: 0.2020 (0.2162) loss: 0.6679 (0.6710) time: 0.1805 data: 0.0921 max mem: 9377 +Train: [72] [4600/6250] eta: 0:04:29 lr: 0.000024 grad: 0.1829 (0.2160) loss: 0.6778 (0.6711) time: 0.1644 data: 0.0751 max mem: 9377 +Train: [72] [4700/6250] eta: 0:04:13 lr: 0.000024 grad: 0.1892 (0.2157) loss: 0.6867 (0.6713) time: 0.1921 data: 0.0929 max mem: 9377 +Train: [72] [4800/6250] eta: 0:03:57 lr: 0.000024 grad: 0.1920 (0.2153) loss: 0.6777 (0.6714) time: 0.1967 data: 0.0948 max mem: 9377 +Train: [72] [4900/6250] eta: 0:03:40 lr: 0.000024 grad: 0.1929 (0.2150) loss: 0.6782 (0.6715) time: 0.1648 data: 0.0711 max mem: 9377 +Train: [72] [5000/6250] eta: 0:03:24 lr: 0.000024 grad: 0.2003 (0.2148) loss: 0.6762 (0.6716) time: 0.1387 data: 0.0364 max mem: 9377 +Train: [72] [5100/6250] eta: 0:03:07 lr: 0.000024 grad: 0.1869 (0.2147) loss: 0.6575 (0.6716) time: 0.1449 data: 0.0538 max mem: 9377 +Train: [72] [5200/6250] eta: 0:02:51 lr: 0.000024 grad: 0.1969 (0.2145) loss: 0.6637 (0.6716) time: 0.1548 data: 0.0700 max mem: 9377 +Train: [72] [5300/6250] eta: 0:02:34 lr: 0.000024 grad: 0.1893 (0.2143) loss: 0.6859 (0.6716) time: 0.1624 data: 0.0743 max mem: 9377 +Train: [72] [5400/6250] eta: 0:02:18 lr: 0.000024 grad: 0.1896 (0.2143) loss: 0.6626 (0.6715) time: 0.1394 data: 0.0497 max mem: 9377 +Train: [72] [5500/6250] eta: 0:02:02 lr: 0.000023 grad: 0.1985 (0.2140) loss: 0.6721 (0.6715) time: 0.1526 data: 0.0618 max mem: 9377 +Train: [72] [5600/6250] eta: 0:01:45 lr: 0.000023 grad: 0.1887 (0.2138) loss: 0.6703 (0.6715) time: 0.1622 data: 0.0656 max mem: 9377 +Train: [72] [5700/6250] eta: 0:01:29 lr: 0.000023 grad: 0.2020 (0.2138) loss: 0.6709 (0.6714) time: 0.1635 data: 0.0738 max mem: 9377 +Train: [72] [5800/6250] eta: 0:01:13 lr: 0.000023 grad: 0.1936 (0.2137) loss: 0.6781 (0.6714) time: 0.1759 data: 0.0862 max mem: 9377 +Train: [72] [5900/6250] eta: 0:00:56 lr: 0.000023 grad: 0.1896 (0.2136) loss: 0.6727 (0.6713) time: 0.1232 data: 0.0313 max mem: 9377 +Train: [72] [6000/6250] eta: 0:00:40 lr: 0.000023 grad: 0.2042 (0.2135) loss: 0.6706 (0.6713) time: 0.1626 data: 0.0800 max mem: 9377 +Train: [72] [6100/6250] eta: 0:00:24 lr: 0.000023 grad: 0.1962 (0.2136) loss: 0.6628 (0.6713) time: 0.1554 data: 0.0677 max mem: 9377 +Train: [72] [6200/6250] eta: 0:00:08 lr: 0.000023 grad: 0.2115 (0.2134) loss: 0.6693 (0.6712) time: 0.1852 data: 0.1006 max mem: 9377 +Train: [72] [6249/6250] eta: 0:00:00 lr: 0.000023 grad: 0.1968 (0.2135) loss: 0.6688 (0.6712) time: 0.1643 data: 0.0777 max mem: 9377 +Train: [72] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000023 grad: 0.1968 (0.2135) loss: 0.6688 (0.6712) +Eval (hcp-train-subset): [72] [ 0/62] eta: 0:05:42 loss: 0.8972 (0.8972) time: 5.5240 data: 5.4925 max mem: 9377 +Eval (hcp-train-subset): [72] [61/62] eta: 0:00:00 loss: 0.9046 (0.9041) time: 0.1595 data: 0.1343 max mem: 9377 +Eval (hcp-train-subset): [72] Total time: 0:00:15 (0.2427 s / it) +Averaged stats (hcp-train-subset): loss: 0.9046 (0.9041) +Eval (hcp-val): [72] [ 0/62] eta: 0:05:18 loss: 0.9125 (0.9125) time: 5.1336 data: 5.0989 max mem: 9377 +Eval (hcp-val): [72] [61/62] eta: 0:00:00 loss: 0.9033 (0.9047) time: 0.1379 data: 0.1128 max mem: 9377 +Eval (hcp-val): [72] Total time: 0:00:15 (0.2501 s / it) +Averaged stats (hcp-val): loss: 0.9033 (0.9047) +Eval (nsd-val): [72] [ 0/62] eta: 0:06:10 loss: 0.9032 (0.9032) time: 5.9737 data: 5.9434 max mem: 9377 +Eval (nsd-val): [72] [61/62] eta: 0:00:00 loss: 0.9116 (0.9134) time: 0.1360 data: 0.1105 max mem: 9377 +Eval (nsd-val): [72] Total time: 0:00:15 (0.2435 s / it) +Averaged stats (nsd-val): loss: 0.9116 (0.9134) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [73] [ 0/6250] eta: 12:09:18 lr: 0.000023 grad: 0.2760 (0.2760) loss: 0.7079 (0.7079) time: 7.0013 data: 6.8969 max mem: 9377 +Train: [73] [ 100/6250] eta: 0:23:36 lr: 0.000023 grad: 0.2797 (0.3148) loss: 0.6598 (0.6910) time: 0.1387 data: 0.0266 max mem: 9377 +Train: [73] [ 200/6250] eta: 0:20:31 lr: 0.000023 grad: 0.2934 (0.3243) loss: 0.6505 (0.6821) time: 0.1835 data: 0.0882 max mem: 9377 +Train: [73] [ 300/6250] eta: 0:19:18 lr: 0.000023 grad: 0.2438 (0.3043) loss: 0.6716 (0.6796) time: 0.1666 data: 0.0747 max mem: 9377 +Train: [73] [ 400/6250] eta: 0:18:31 lr: 0.000023 grad: 0.2399 (0.2912) loss: 0.6382 (0.6755) time: 0.1900 data: 0.0905 max mem: 9377 +Train: [73] [ 500/6250] eta: 0:17:47 lr: 0.000023 grad: 0.2114 (0.2795) loss: 0.6700 (0.6731) time: 0.1708 data: 0.0709 max mem: 9377 +Train: [73] [ 600/6250] eta: 0:17:07 lr: 0.000023 grad: 0.2018 (0.2686) loss: 0.6665 (0.6726) time: 0.1490 data: 0.0518 max mem: 9377 +Train: [73] [ 700/6250] eta: 0:16:32 lr: 0.000023 grad: 0.2081 (0.2615) loss: 0.6755 (0.6724) time: 0.1381 data: 0.0365 max mem: 9377 +Train: [73] [ 800/6250] eta: 0:16:04 lr: 0.000023 grad: 0.2090 (0.2548) loss: 0.6556 (0.6721) time: 0.1311 data: 0.0323 max mem: 9377 +Train: [73] [ 900/6250] eta: 0:15:41 lr: 0.000023 grad: 0.1983 (0.2497) loss: 0.6754 (0.6721) time: 0.1707 data: 0.0781 max mem: 9377 +Train: [73] [1000/6250] eta: 0:15:23 lr: 0.000023 grad: 0.1933 (0.2453) loss: 0.6659 (0.6723) time: 0.2016 data: 0.1185 max mem: 9377 +Train: [73] [1100/6250] eta: 0:14:59 lr: 0.000023 grad: 0.1934 (0.2414) loss: 0.6739 (0.6723) time: 0.1714 data: 0.0888 max mem: 9377 +Train: [73] [1200/6250] eta: 0:14:34 lr: 0.000023 grad: 0.1927 (0.2383) loss: 0.6698 (0.6723) time: 0.1719 data: 0.0813 max mem: 9377 +Train: [73] [1300/6250] eta: 0:14:14 lr: 0.000023 grad: 0.1961 (0.2356) loss: 0.6649 (0.6720) time: 0.1667 data: 0.0787 max mem: 9377 +Train: [73] [1400/6250] eta: 0:13:52 lr: 0.000023 grad: 0.1950 (0.2337) loss: 0.6840 (0.6721) time: 0.1610 data: 0.0677 max mem: 9377 +Train: [73] [1500/6250] eta: 0:13:29 lr: 0.000023 grad: 0.1934 (0.2315) loss: 0.6743 (0.6719) time: 0.1519 data: 0.0701 max mem: 9377 +Train: [73] [1600/6250] eta: 0:13:06 lr: 0.000023 grad: 0.1941 (0.2303) loss: 0.6638 (0.6718) time: 0.1509 data: 0.0625 max mem: 9377 +Train: [73] [1700/6250] eta: 0:12:43 lr: 0.000023 grad: 0.1947 (0.2290) loss: 0.6744 (0.6719) time: 0.1443 data: 0.0508 max mem: 9377 +Train: [73] [1800/6250] eta: 0:12:21 lr: 0.000023 grad: 0.1958 (0.2278) loss: 0.6613 (0.6716) time: 0.1453 data: 0.0542 max mem: 9377 +Train: [73] [1900/6250] eta: 0:12:01 lr: 0.000023 grad: 0.1903 (0.2270) loss: 0.6735 (0.6715) time: 0.1612 data: 0.0697 max mem: 9377 +Train: [73] [2000/6250] eta: 0:11:41 lr: 0.000023 grad: 0.1941 (0.2265) loss: 0.6623 (0.6713) time: 0.1624 data: 0.0726 max mem: 9377 +Train: [73] [2100/6250] eta: 0:11:20 lr: 0.000023 grad: 0.1917 (0.2258) loss: 0.6575 (0.6710) time: 0.1501 data: 0.0521 max mem: 9377 +Train: [73] [2200/6250] eta: 0:11:01 lr: 0.000023 grad: 0.1997 (0.2254) loss: 0.6595 (0.6705) time: 0.1441 data: 0.0549 max mem: 9377 +Train: [73] [2300/6250] eta: 0:10:43 lr: 0.000023 grad: 0.1913 (0.2244) loss: 0.6495 (0.6703) time: 0.1466 data: 0.0503 max mem: 9377 +Train: [73] [2400/6250] eta: 0:10:31 lr: 0.000023 grad: 0.1913 (0.2240) loss: 0.6672 (0.6698) time: 0.2816 data: 0.2067 max mem: 9377 +Train: [73] [2500/6250] eta: 0:10:18 lr: 0.000023 grad: 0.2019 (0.2238) loss: 0.6462 (0.6693) time: 0.2140 data: 0.1335 max mem: 9377 +Train: [73] [2600/6250] eta: 0:10:04 lr: 0.000023 grad: 0.1953 (0.2234) loss: 0.6604 (0.6689) time: 0.1719 data: 0.0924 max mem: 9377 +Train: [73] [2700/6250] eta: 0:09:49 lr: 0.000023 grad: 0.1984 (0.2230) loss: 0.6546 (0.6686) time: 0.1834 data: 0.1001 max mem: 9377 +Train: [73] [2800/6250] eta: 0:09:34 lr: 0.000023 grad: 0.1934 (0.2227) loss: 0.6469 (0.6682) time: 0.1587 data: 0.0724 max mem: 9377 +Train: [73] [2900/6250] eta: 0:09:19 lr: 0.000023 grad: 0.1933 (0.2224) loss: 0.6551 (0.6680) time: 0.1811 data: 0.0823 max mem: 9377 +Train: [73] [3000/6250] eta: 0:09:03 lr: 0.000023 grad: 0.1987 (0.2220) loss: 0.6667 (0.6677) time: 0.1730 data: 0.0738 max mem: 9377 +Train: [73] [3100/6250] eta: 0:08:46 lr: 0.000023 grad: 0.1978 (0.2218) loss: 0.6684 (0.6675) time: 0.1159 data: 0.0170 max mem: 9377 +Train: [73] [3200/6250] eta: 0:08:28 lr: 0.000022 grad: 0.1906 (0.2213) loss: 0.6666 (0.6674) time: 0.1597 data: 0.0639 max mem: 9377 +Train: [73] [3300/6250] eta: 0:08:10 lr: 0.000022 grad: 0.1965 (0.2206) loss: 0.6568 (0.6672) time: 0.1547 data: 0.0528 max mem: 9377 +Train: [73] [3400/6250] eta: 0:07:53 lr: 0.000022 grad: 0.1948 (0.2204) loss: 0.6745 (0.6671) time: 0.1556 data: 0.0715 max mem: 9377 +Train: [73] [3500/6250] eta: 0:07:36 lr: 0.000022 grad: 0.1969 (0.2200) loss: 0.6747 (0.6670) time: 0.1654 data: 0.0736 max mem: 9377 +Train: [73] [3600/6250] eta: 0:07:18 lr: 0.000022 grad: 0.2031 (0.2201) loss: 0.6614 (0.6670) time: 0.1565 data: 0.0634 max mem: 9377 +Train: [73] [3700/6250] eta: 0:07:00 lr: 0.000022 grad: 0.1919 (0.2199) loss: 0.6664 (0.6669) time: 0.1418 data: 0.0483 max mem: 9377 +Train: [73] [3800/6250] eta: 0:06:44 lr: 0.000022 grad: 0.1915 (0.2197) loss: 0.6740 (0.6669) time: 0.1528 data: 0.0658 max mem: 9377 +Train: [73] [3900/6250] eta: 0:06:26 lr: 0.000022 grad: 0.1968 (0.2195) loss: 0.6855 (0.6671) time: 0.1514 data: 0.0567 max mem: 9377 +Train: [73] [4000/6250] eta: 0:06:10 lr: 0.000022 grad: 0.1974 (0.2192) loss: 0.6683 (0.6672) time: 0.1808 data: 0.0901 max mem: 9377 +Train: [73] [4100/6250] eta: 0:05:53 lr: 0.000022 grad: 0.1922 (0.2190) loss: 0.6603 (0.6672) time: 0.1500 data: 0.0687 max mem: 9377 +Train: [73] [4200/6250] eta: 0:05:38 lr: 0.000022 grad: 0.1905 (0.2188) loss: 0.6595 (0.6672) time: 0.2137 data: 0.1381 max mem: 9377 +Train: [73] [4300/6250] eta: 0:05:22 lr: 0.000022 grad: 0.1921 (0.2187) loss: 0.6775 (0.6672) time: 0.1680 data: 0.0865 max mem: 9377 +Train: [73] [4400/6250] eta: 0:05:05 lr: 0.000022 grad: 0.1937 (0.2186) loss: 0.6695 (0.6672) time: 0.1703 data: 0.0907 max mem: 9377 +Train: [73] [4500/6250] eta: 0:04:49 lr: 0.000022 grad: 0.1980 (0.2185) loss: 0.6519 (0.6671) time: 0.1733 data: 0.0850 max mem: 9377 +Train: [73] [4600/6250] eta: 0:04:33 lr: 0.000022 grad: 0.1946 (0.2187) loss: 0.6643 (0.6671) time: 0.1436 data: 0.0487 max mem: 9377 +Train: [73] [4700/6250] eta: 0:04:17 lr: 0.000022 grad: 0.1951 (0.2184) loss: 0.6753 (0.6670) time: 0.1798 data: 0.0853 max mem: 9377 +Train: [73] [4800/6250] eta: 0:04:01 lr: 0.000022 grad: 0.1916 (0.2183) loss: 0.6640 (0.6671) time: 0.1760 data: 0.0746 max mem: 9377 +Train: [73] [4900/6250] eta: 0:03:44 lr: 0.000022 grad: 0.2000 (0.2180) loss: 0.6596 (0.6669) time: 0.1802 data: 0.0844 max mem: 9377 +Train: [73] [5000/6250] eta: 0:03:28 lr: 0.000022 grad: 0.1919 (0.2177) loss: 0.6564 (0.6670) time: 0.1611 data: 0.0611 max mem: 9377 +Train: [73] [5100/6250] eta: 0:03:11 lr: 0.000022 grad: 0.1996 (0.2179) loss: 0.6631 (0.6670) time: 0.1539 data: 0.0598 max mem: 9377 +Train: [73] [5200/6250] eta: 0:02:54 lr: 0.000022 grad: 0.1933 (0.2177) loss: 0.6577 (0.6669) time: 0.1326 data: 0.0410 max mem: 9377 +Train: [73] [5300/6250] eta: 0:02:37 lr: 0.000022 grad: 0.1903 (0.2177) loss: 0.6520 (0.6669) time: 0.1668 data: 0.0738 max mem: 9377 +Train: [73] [5400/6250] eta: 0:02:21 lr: 0.000022 grad: 0.2052 (0.2177) loss: 0.6625 (0.6668) time: 0.1503 data: 0.0565 max mem: 9377 +Train: [73] [5500/6250] eta: 0:02:04 lr: 0.000022 grad: 0.1958 (0.2177) loss: 0.6704 (0.6669) time: 0.1463 data: 0.0416 max mem: 9377 +Train: [73] [5600/6250] eta: 0:01:47 lr: 0.000022 grad: 0.2014 (0.2179) loss: 0.6658 (0.6668) time: 0.1642 data: 0.0792 max mem: 9377 +Train: [73] [5700/6250] eta: 0:01:31 lr: 0.000022 grad: 0.1995 (0.2180) loss: 0.6702 (0.6668) time: 0.1653 data: 0.0752 max mem: 9377 +Train: [73] [5800/6250] eta: 0:01:14 lr: 0.000022 grad: 0.1982 (0.2180) loss: 0.6632 (0.6668) time: 0.1474 data: 0.0600 max mem: 9377 +Train: [73] [5900/6250] eta: 0:00:57 lr: 0.000022 grad: 0.1965 (0.2177) loss: 0.6687 (0.6669) time: 0.1689 data: 0.0791 max mem: 9377 +Train: [73] [6000/6250] eta: 0:00:41 lr: 0.000022 grad: 0.2003 (0.2177) loss: 0.6611 (0.6669) time: 0.1597 data: 0.0809 max mem: 9377 +Train: [73] [6100/6250] eta: 0:00:24 lr: 0.000022 grad: 0.1927 (0.2176) loss: 0.6738 (0.6671) time: 0.1522 data: 0.0620 max mem: 9377 +Train: [73] [6200/6250] eta: 0:00:08 lr: 0.000022 grad: 0.1914 (0.2175) loss: 0.6729 (0.6672) time: 0.1576 data: 0.0629 max mem: 9377 +Train: [73] [6249/6250] eta: 0:00:00 lr: 0.000022 grad: 0.2034 (0.2177) loss: 0.6698 (0.6672) time: 0.1518 data: 0.0607 max mem: 9377 +Train: [73] Total time: 0:17:16 (0.1658 s / it) +Averaged stats: lr: 0.000022 grad: 0.2034 (0.2177) loss: 0.6698 (0.6672) +Eval (hcp-train-subset): [73] [ 0/62] eta: 0:06:01 loss: 0.8919 (0.8919) time: 5.8371 data: 5.8054 max mem: 9377 +Eval (hcp-train-subset): [73] [61/62] eta: 0:00:00 loss: 0.9082 (0.9070) time: 0.1293 data: 0.1042 max mem: 9377 +Eval (hcp-train-subset): [73] Total time: 0:00:15 (0.2465 s / it) +Averaged stats (hcp-train-subset): loss: 0.9082 (0.9070) +Eval (hcp-val): [73] [ 0/62] eta: 0:06:37 loss: 0.9054 (0.9054) time: 6.4054 data: 6.3752 max mem: 9377 +Eval (hcp-val): [73] [61/62] eta: 0:00:00 loss: 0.9052 (0.9075) time: 0.1294 data: 0.1028 max mem: 9377 +Eval (hcp-val): [73] Total time: 0:00:14 (0.2375 s / it) +Averaged stats (hcp-val): loss: 0.9052 (0.9075) +Eval (nsd-val): [73] [ 0/62] eta: 0:05:35 loss: 0.9105 (0.9105) time: 5.4118 data: 5.3816 max mem: 9377 +Eval (nsd-val): [73] [61/62] eta: 0:00:00 loss: 0.9088 (0.9132) time: 0.1432 data: 0.1180 max mem: 9377 +Eval (nsd-val): [73] Total time: 0:00:14 (0.2377 s / it) +Averaged stats (nsd-val): loss: 0.9088 (0.9132) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [74] [ 0/6250] eta: 8:04:42 lr: 0.000022 grad: 0.2798 (0.2798) loss: 0.7062 (0.7062) time: 4.6532 data: 4.3686 max mem: 9377 +Train: [74] [ 100/6250] eta: 0:23:42 lr: 0.000022 grad: 0.3354 (0.3586) loss: 0.6636 (0.6706) time: 0.1863 data: 0.0860 max mem: 9377 +Train: [74] [ 200/6250] eta: 0:20:47 lr: 0.000022 grad: 0.2838 (0.3505) loss: 0.6465 (0.6623) time: 0.1982 data: 0.0966 max mem: 9377 +Train: [74] [ 300/6250] eta: 0:19:36 lr: 0.000022 grad: 0.2707 (0.3319) loss: 0.6385 (0.6601) time: 0.2099 data: 0.1147 max mem: 9377 +Train: [74] [ 400/6250] eta: 0:18:30 lr: 0.000022 grad: 0.2372 (0.3142) loss: 0.6502 (0.6596) time: 0.1648 data: 0.0425 max mem: 9377 +Train: [74] [ 500/6250] eta: 0:17:50 lr: 0.000022 grad: 0.2279 (0.3018) loss: 0.6405 (0.6582) time: 0.1815 data: 0.0838 max mem: 9377 +Train: [74] [ 600/6250] eta: 0:17:11 lr: 0.000022 grad: 0.2274 (0.2903) loss: 0.6377 (0.6564) time: 0.1569 data: 0.0548 max mem: 9377 +Train: [74] [ 700/6250] eta: 0:16:31 lr: 0.000022 grad: 0.2125 (0.2863) loss: 0.6554 (0.6555) time: 0.1471 data: 0.0506 max mem: 9377 +Train: [74] [ 800/6250] eta: 0:15:57 lr: 0.000022 grad: 0.2025 (0.2782) loss: 0.6616 (0.6557) time: 0.1446 data: 0.0472 max mem: 9377 +Train: [74] [ 900/6250] eta: 0:15:34 lr: 0.000021 grad: 0.2018 (0.2714) loss: 0.6575 (0.6567) time: 0.1922 data: 0.1098 max mem: 9377 +Train: [74] [1000/6250] eta: 0:15:09 lr: 0.000021 grad: 0.1985 (0.2670) loss: 0.6529 (0.6576) time: 0.1613 data: 0.0762 max mem: 9377 +Train: [74] [1100/6250] eta: 0:14:42 lr: 0.000021 grad: 0.2040 (0.2624) loss: 0.6610 (0.6584) time: 0.1572 data: 0.0645 max mem: 9377 +Train: [74] [1200/6250] eta: 0:14:17 lr: 0.000021 grad: 0.1993 (0.2586) loss: 0.6577 (0.6587) time: 0.1643 data: 0.0851 max mem: 9377 +Train: [74] [1300/6250] eta: 0:13:59 lr: 0.000021 grad: 0.1986 (0.2552) loss: 0.6630 (0.6595) time: 0.1675 data: 0.0754 max mem: 9377 +Train: [74] [1400/6250] eta: 0:13:38 lr: 0.000021 grad: 0.2053 (0.2519) loss: 0.6524 (0.6600) time: 0.1416 data: 0.0412 max mem: 9377 +Train: [74] [1500/6250] eta: 0:13:17 lr: 0.000021 grad: 0.1906 (0.2502) loss: 0.6557 (0.6606) time: 0.1276 data: 0.0327 max mem: 9377 +Train: [74] [1600/6250] eta: 0:12:55 lr: 0.000021 grad: 0.1968 (0.2483) loss: 0.6580 (0.6609) time: 0.1593 data: 0.0563 max mem: 9377 +Train: [74] [1700/6250] eta: 0:12:37 lr: 0.000021 grad: 0.1901 (0.2470) loss: 0.6693 (0.6616) time: 0.1504 data: 0.0498 max mem: 9377 +Train: [74] [1800/6250] eta: 0:12:19 lr: 0.000021 grad: 0.1962 (0.2459) loss: 0.6764 (0.6618) time: 0.1546 data: 0.0621 max mem: 9377 +Train: [74] [1900/6250] eta: 0:12:01 lr: 0.000021 grad: 0.1923 (0.2442) loss: 0.6654 (0.6622) time: 0.1565 data: 0.0659 max mem: 9377 +Train: [74] [2000/6250] eta: 0:11:44 lr: 0.000021 grad: 0.1904 (0.2430) loss: 0.6727 (0.6625) time: 0.1445 data: 0.0527 max mem: 9377 +Train: [74] [2100/6250] eta: 0:11:27 lr: 0.000021 grad: 0.2083 (0.2416) loss: 0.6612 (0.6627) time: 0.1735 data: 0.0883 max mem: 9377 +Train: [74] [2200/6250] eta: 0:11:09 lr: 0.000021 grad: 0.1976 (0.2407) loss: 0.6401 (0.6627) time: 0.1532 data: 0.0655 max mem: 9377 +Train: [74] [2300/6250] eta: 0:10:51 lr: 0.000021 grad: 0.1964 (0.2400) loss: 0.6668 (0.6628) time: 0.1688 data: 0.0849 max mem: 9377 +Train: [74] [2400/6250] eta: 0:10:33 lr: 0.000021 grad: 0.1999 (0.2394) loss: 0.6610 (0.6627) time: 0.1537 data: 0.0655 max mem: 9377 +Train: [74] [2500/6250] eta: 0:10:18 lr: 0.000021 grad: 0.1925 (0.2387) loss: 0.6676 (0.6626) time: 0.1455 data: 0.0599 max mem: 9377 +Train: [74] [2600/6250] eta: 0:10:01 lr: 0.000021 grad: 0.1999 (0.2382) loss: 0.6563 (0.6626) time: 0.1763 data: 0.0855 max mem: 9377 +Train: [74] [2700/6250] eta: 0:09:45 lr: 0.000021 grad: 0.1946 (0.2369) loss: 0.6635 (0.6628) time: 0.1641 data: 0.0798 max mem: 9377 +Train: [74] [2800/6250] eta: 0:09:28 lr: 0.000021 grad: 0.1895 (0.2362) loss: 0.6654 (0.6631) time: 0.1655 data: 0.0803 max mem: 9377 +Train: [74] [2900/6250] eta: 0:09:13 lr: 0.000021 grad: 0.1953 (0.2358) loss: 0.6757 (0.6633) time: 0.2056 data: 0.1127 max mem: 9377 +Train: [74] [3000/6250] eta: 0:08:55 lr: 0.000021 grad: 0.2015 (0.2349) loss: 0.6499 (0.6634) time: 0.1561 data: 0.0596 max mem: 9377 +Train: [74] [3100/6250] eta: 0:08:38 lr: 0.000021 grad: 0.1981 (0.2341) loss: 0.6767 (0.6636) time: 0.1276 data: 0.0273 max mem: 9377 +Train: [74] [3200/6250] eta: 0:08:21 lr: 0.000021 grad: 0.1953 (0.2333) loss: 0.6807 (0.6640) time: 0.1529 data: 0.0678 max mem: 9377 +Train: [74] [3300/6250] eta: 0:08:03 lr: 0.000021 grad: 0.1874 (0.2326) loss: 0.6780 (0.6643) time: 0.1533 data: 0.0626 max mem: 9377 +Train: [74] [3400/6250] eta: 0:07:47 lr: 0.000021 grad: 0.1939 (0.2319) loss: 0.6823 (0.6645) time: 0.1087 data: 0.0108 max mem: 9377 +Train: [74] [3500/6250] eta: 0:07:29 lr: 0.000021 grad: 0.1913 (0.2312) loss: 0.6659 (0.6646) time: 0.1473 data: 0.0566 max mem: 9377 +Train: [74] [3600/6250] eta: 0:07:12 lr: 0.000021 grad: 0.1971 (0.2307) loss: 0.6524 (0.6644) time: 0.1518 data: 0.0588 max mem: 9377 +Train: [74] [3700/6250] eta: 0:06:55 lr: 0.000021 grad: 0.1997 (0.2303) loss: 0.6394 (0.6641) time: 0.1528 data: 0.0688 max mem: 9377 +Train: [74] [3800/6250] eta: 0:06:38 lr: 0.000021 grad: 0.1962 (0.2298) loss: 0.6588 (0.6639) time: 0.1650 data: 0.0778 max mem: 9377 +Train: [74] [3900/6250] eta: 0:06:22 lr: 0.000021 grad: 0.1946 (0.2296) loss: 0.6587 (0.6638) time: 0.1632 data: 0.0779 max mem: 9377 +Train: [74] [4000/6250] eta: 0:06:05 lr: 0.000021 grad: 0.1925 (0.2292) loss: 0.6607 (0.6637) time: 0.1537 data: 0.0697 max mem: 9377 +Train: [74] [4100/6250] eta: 0:05:48 lr: 0.000021 grad: 0.1980 (0.2288) loss: 0.6553 (0.6635) time: 0.1542 data: 0.0552 max mem: 9377 +Train: [74] [4200/6250] eta: 0:05:32 lr: 0.000021 grad: 0.1977 (0.2287) loss: 0.6609 (0.6633) time: 0.1968 data: 0.1181 max mem: 9377 +Train: [74] [4300/6250] eta: 0:05:16 lr: 0.000021 grad: 0.1956 (0.2283) loss: 0.6581 (0.6631) time: 0.1844 data: 0.0931 max mem: 9377 +Train: [74] [4400/6250] eta: 0:05:00 lr: 0.000021 grad: 0.1912 (0.2282) loss: 0.6586 (0.6630) time: 0.1719 data: 0.0875 max mem: 9377 +Train: [74] [4500/6250] eta: 0:04:43 lr: 0.000021 grad: 0.1936 (0.2279) loss: 0.6618 (0.6630) time: 0.1610 data: 0.0723 max mem: 9377 +Train: [74] [4600/6250] eta: 0:04:28 lr: 0.000021 grad: 0.1887 (0.2273) loss: 0.6546 (0.6629) time: 0.1751 data: 0.0919 max mem: 9377 +Train: [74] [4700/6250] eta: 0:04:12 lr: 0.000021 grad: 0.1880 (0.2270) loss: 0.6594 (0.6629) time: 0.1665 data: 0.0754 max mem: 9377 +Train: [74] [4800/6250] eta: 0:03:56 lr: 0.000021 grad: 0.1917 (0.2268) loss: 0.6657 (0.6629) time: 0.1814 data: 0.0874 max mem: 9377 +Train: [74] [4900/6250] eta: 0:03:39 lr: 0.000020 grad: 0.1949 (0.2264) loss: 0.6570 (0.6628) time: 0.1654 data: 0.0735 max mem: 9377 +Train: [74] [5000/6250] eta: 0:03:23 lr: 0.000020 grad: 0.1883 (0.2260) loss: 0.6690 (0.6627) time: 0.1542 data: 0.0531 max mem: 9377 +Train: [74] [5100/6250] eta: 0:03:06 lr: 0.000020 grad: 0.1972 (0.2260) loss: 0.6633 (0.6627) time: 0.1432 data: 0.0453 max mem: 9377 +Train: [74] [5200/6250] eta: 0:02:50 lr: 0.000020 grad: 0.2049 (0.2260) loss: 0.6627 (0.6627) time: 0.1674 data: 0.0779 max mem: 9377 +Train: [74] [5300/6250] eta: 0:02:34 lr: 0.000020 grad: 0.1959 (0.2259) loss: 0.6767 (0.6628) time: 0.1804 data: 0.0886 max mem: 9377 +Train: [74] [5400/6250] eta: 0:02:17 lr: 0.000020 grad: 0.1898 (0.2259) loss: 0.6596 (0.6629) time: 0.1606 data: 0.0655 max mem: 9377 +Train: [74] [5500/6250] eta: 0:02:01 lr: 0.000020 grad: 0.2000 (0.2259) loss: 0.6577 (0.6629) time: 0.1658 data: 0.0875 max mem: 9377 +Train: [74] [5600/6250] eta: 0:01:45 lr: 0.000020 grad: 0.1976 (0.2259) loss: 0.6582 (0.6629) time: 0.1524 data: 0.0583 max mem: 9377 +Train: [74] [5700/6250] eta: 0:01:28 lr: 0.000020 grad: 0.2027 (0.2259) loss: 0.6644 (0.6629) time: 0.1439 data: 0.0532 max mem: 9377 +Train: [74] [5800/6250] eta: 0:01:12 lr: 0.000020 grad: 0.1964 (0.2258) loss: 0.6651 (0.6629) time: 0.1425 data: 0.0610 max mem: 9377 +Train: [74] [5900/6250] eta: 0:00:56 lr: 0.000020 grad: 0.1966 (0.2257) loss: 0.6698 (0.6629) time: 0.1457 data: 0.0607 max mem: 9377 +Train: [74] [6000/6250] eta: 0:00:40 lr: 0.000020 grad: 0.1923 (0.2255) loss: 0.6566 (0.6630) time: 0.1432 data: 0.0555 max mem: 9377 +Train: [74] [6100/6250] eta: 0:00:24 lr: 0.000020 grad: 0.1953 (0.2253) loss: 0.6622 (0.6630) time: 0.1434 data: 0.0523 max mem: 9377 +Train: [74] [6200/6250] eta: 0:00:08 lr: 0.000020 grad: 0.2011 (0.2252) loss: 0.6552 (0.6630) time: 0.1637 data: 0.0696 max mem: 9377 +Train: [74] [6249/6250] eta: 0:00:00 lr: 0.000020 grad: 0.1961 (0.2252) loss: 0.6754 (0.6630) time: 0.1599 data: 0.0752 max mem: 9377 +Train: [74] Total time: 0:16:53 (0.1621 s / it) +Averaged stats: lr: 0.000020 grad: 0.1961 (0.2252) loss: 0.6754 (0.6630) +Eval (hcp-train-subset): [74] [ 0/62] eta: 0:05:06 loss: 0.8944 (0.8944) time: 4.9453 data: 4.9101 max mem: 9377 +Eval (hcp-train-subset): [74] [61/62] eta: 0:00:00 loss: 0.9037 (0.9062) time: 0.1428 data: 0.1175 max mem: 9377 +Eval (hcp-train-subset): [74] Total time: 0:00:14 (0.2413 s / it) +Averaged stats (hcp-train-subset): loss: 0.9037 (0.9062) +Making plots (hcp-train-subset): example=19 +Eval (hcp-val): [74] [ 0/62] eta: 0:06:05 loss: 0.9157 (0.9157) time: 5.8876 data: 5.8518 max mem: 9377 +Eval (hcp-val): [74] [61/62] eta: 0:00:00 loss: 0.9062 (0.9066) time: 0.1401 data: 0.1141 max mem: 9377 +Eval (hcp-val): [74] Total time: 0:00:15 (0.2433 s / it) +Averaged stats (hcp-val): loss: 0.9062 (0.9066) +Making plots (hcp-val): example=5 +Eval (nsd-val): [74] [ 0/62] eta: 0:05:43 loss: 0.8993 (0.8993) time: 5.5441 data: 5.5027 max mem: 9377 +Eval (nsd-val): [74] [61/62] eta: 0:00:00 loss: 0.9053 (0.9079) time: 0.1387 data: 0.1119 max mem: 9377 +Eval (nsd-val): [74] Total time: 0:00:14 (0.2407 s / it) +Averaged stats (nsd-val): loss: 0.9053 (0.9079) +Making plots (nsd-val): example=52 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00074.pth +Train: [75] [ 0/6250] eta: 10:22:14 lr: 0.000020 grad: 0.2791 (0.2791) loss: 0.6220 (0.6220) time: 5.9736 data: 5.7018 max mem: 9377 +Train: [75] [ 100/6250] eta: 0:23:04 lr: 0.000020 grad: 0.2765 (0.3106) loss: 0.7172 (0.7045) time: 0.1525 data: 0.0445 max mem: 9377 +Train: [75] [ 200/6250] eta: 0:19:31 lr: 0.000020 grad: 0.3103 (0.3063) loss: 0.6865 (0.6940) time: 0.1601 data: 0.0497 max mem: 9377 +Train: [75] [ 300/6250] eta: 0:18:07 lr: 0.000020 grad: 0.2576 (0.3036) loss: 0.6510 (0.6887) time: 0.1491 data: 0.0516 max mem: 9377 +Train: [75] [ 400/6250] eta: 0:17:32 lr: 0.000020 grad: 0.2363 (0.2932) loss: 0.6474 (0.6834) time: 0.1591 data: 0.0599 max mem: 9377 +Train: [75] [ 500/6250] eta: 0:16:45 lr: 0.000020 grad: 0.2165 (0.2847) loss: 0.6676 (0.6796) time: 0.1612 data: 0.0661 max mem: 9377 +Train: [75] [ 600/6250] eta: 0:16:02 lr: 0.000020 grad: 0.2115 (0.2764) loss: 0.6645 (0.6755) time: 0.1394 data: 0.0393 max mem: 9377 +Train: [75] [ 700/6250] eta: 0:15:43 lr: 0.000020 grad: 0.2079 (0.2677) loss: 0.6719 (0.6736) time: 0.1524 data: 0.0611 max mem: 9377 +Train: [75] [ 800/6250] eta: 0:15:25 lr: 0.000020 grad: 0.2095 (0.2620) loss: 0.6568 (0.6722) time: 0.1722 data: 0.0841 max mem: 9377 +Train: [75] [ 900/6250] eta: 0:15:10 lr: 0.000020 grad: 0.1962 (0.2565) loss: 0.6751 (0.6711) time: 0.1959 data: 0.0890 max mem: 9377 +Train: [75] [1000/6250] eta: 0:14:56 lr: 0.000020 grad: 0.1965 (0.2520) loss: 0.6715 (0.6708) time: 0.1678 data: 0.0799 max mem: 9377 +Train: [75] [1100/6250] eta: 0:14:38 lr: 0.000020 grad: 0.1984 (0.2480) loss: 0.6535 (0.6703) time: 0.1504 data: 0.0641 max mem: 9377 +Train: [75] [1200/6250] eta: 0:14:22 lr: 0.000020 grad: 0.2041 (0.2455) loss: 0.6624 (0.6700) time: 0.1961 data: 0.1064 max mem: 9377 +Train: [75] [1300/6250] eta: 0:14:11 lr: 0.000020 grad: 0.1938 (0.2422) loss: 0.6633 (0.6700) time: 0.1675 data: 0.0618 max mem: 9377 +Train: [75] [1400/6250] eta: 0:13:52 lr: 0.000020 grad: 0.1931 (0.2404) loss: 0.6645 (0.6700) time: 0.1509 data: 0.0556 max mem: 9377 +Train: [75] [1500/6250] eta: 0:13:32 lr: 0.000020 grad: 0.1997 (0.2387) loss: 0.6589 (0.6697) time: 0.1629 data: 0.0670 max mem: 9377 +Train: [75] [1600/6250] eta: 0:13:10 lr: 0.000020 grad: 0.1993 (0.2380) loss: 0.6521 (0.6692) time: 0.1594 data: 0.0665 max mem: 9377 +Train: [75] [1700/6250] eta: 0:12:49 lr: 0.000020 grad: 0.1958 (0.2364) loss: 0.6587 (0.6689) time: 0.1691 data: 0.0853 max mem: 9377 +Train: [75] [1800/6250] eta: 0:12:28 lr: 0.000020 grad: 0.2030 (0.2359) loss: 0.6627 (0.6684) time: 0.1469 data: 0.0549 max mem: 9377 +Train: [75] [1900/6250] eta: 0:12:09 lr: 0.000020 grad: 0.1933 (0.2347) loss: 0.6739 (0.6680) time: 0.1496 data: 0.0647 max mem: 9377 +Train: [75] [2000/6250] eta: 0:11:49 lr: 0.000020 grad: 0.1909 (0.2338) loss: 0.6642 (0.6676) time: 0.1546 data: 0.0806 max mem: 9377 +Train: [75] [2100/6250] eta: 0:11:31 lr: 0.000020 grad: 0.1951 (0.2336) loss: 0.6662 (0.6675) time: 0.1573 data: 0.0754 max mem: 9377 +Train: [75] [2200/6250] eta: 0:11:15 lr: 0.000020 grad: 0.2026 (0.2329) loss: 0.6550 (0.6673) time: 0.1597 data: 0.0627 max mem: 9377 +Train: [75] [2300/6250] eta: 0:10:58 lr: 0.000020 grad: 0.2007 (0.2317) loss: 0.6773 (0.6673) time: 0.1537 data: 0.0632 max mem: 9377 +Train: [75] [2400/6250] eta: 0:10:46 lr: 0.000020 grad: 0.1955 (0.2310) loss: 0.6706 (0.6672) time: 0.1730 data: 0.0842 max mem: 9377 +Train: [75] [2500/6250] eta: 0:10:33 lr: 0.000020 grad: 0.1990 (0.2303) loss: 0.6643 (0.6672) time: 0.2262 data: 0.1343 max mem: 9377 +Train: [75] [2600/6250] eta: 0:10:17 lr: 0.000020 grad: 0.1925 (0.2297) loss: 0.6683 (0.6671) time: 0.1862 data: 0.1020 max mem: 9377 +Train: [75] [2700/6250] eta: 0:10:02 lr: 0.000020 grad: 0.1910 (0.2292) loss: 0.6677 (0.6671) time: 0.1831 data: 0.0977 max mem: 9377 +Train: [75] [2800/6250] eta: 0:09:46 lr: 0.000019 grad: 0.1889 (0.2286) loss: 0.6627 (0.6671) time: 0.1572 data: 0.0667 max mem: 9377 +Train: [75] [2900/6250] eta: 0:09:31 lr: 0.000019 grad: 0.1872 (0.2278) loss: 0.6668 (0.6669) time: 0.1834 data: 0.0934 max mem: 9377 +Train: [75] [3000/6250] eta: 0:09:15 lr: 0.000019 grad: 0.1982 (0.2271) loss: 0.6577 (0.6668) time: 0.1797 data: 0.0828 max mem: 9377 +Train: [75] [3100/6250] eta: 0:08:59 lr: 0.000019 grad: 0.2067 (0.2264) loss: 0.6794 (0.6669) time: 0.1814 data: 0.0907 max mem: 9377 +Train: [75] [3200/6250] eta: 0:08:43 lr: 0.000019 grad: 0.1984 (0.2259) loss: 0.6699 (0.6671) time: 0.1768 data: 0.0863 max mem: 9377 +Train: [75] [3300/6250] eta: 0:08:26 lr: 0.000019 grad: 0.1975 (0.2254) loss: 0.6614 (0.6669) time: 0.1646 data: 0.0734 max mem: 9377 +Train: [75] [3400/6250] eta: 0:08:08 lr: 0.000019 grad: 0.1934 (0.2249) loss: 0.6697 (0.6668) time: 0.1619 data: 0.0748 max mem: 9377 +Train: [75] [3500/6250] eta: 0:07:51 lr: 0.000019 grad: 0.1916 (0.2244) loss: 0.6601 (0.6668) time: 0.1675 data: 0.0711 max mem: 9377 +Train: [75] [3600/6250] eta: 0:07:33 lr: 0.000019 grad: 0.1950 (0.2239) loss: 0.6557 (0.6668) time: 0.1708 data: 0.0798 max mem: 9377 +Train: [75] [3700/6250] eta: 0:07:15 lr: 0.000019 grad: 0.2057 (0.2240) loss: 0.6640 (0.6667) time: 0.1643 data: 0.0646 max mem: 9377 +Train: [75] [3800/6250] eta: 0:06:58 lr: 0.000019 grad: 0.1926 (0.2234) loss: 0.6625 (0.6665) time: 0.1883 data: 0.1004 max mem: 9377 +Train: [75] [3900/6250] eta: 0:06:40 lr: 0.000019 grad: 0.2029 (0.2232) loss: 0.6603 (0.6664) time: 0.1834 data: 0.0984 max mem: 9377 +Train: [75] [4000/6250] eta: 0:06:23 lr: 0.000019 grad: 0.1951 (0.2229) loss: 0.6716 (0.6664) time: 0.1780 data: 0.0935 max mem: 9377 +Train: [75] [4100/6250] eta: 0:06:05 lr: 0.000019 grad: 0.1935 (0.2226) loss: 0.6595 (0.6663) time: 0.1459 data: 0.0600 max mem: 9377 +Train: [75] [4200/6250] eta: 0:05:49 lr: 0.000019 grad: 0.1952 (0.2223) loss: 0.6640 (0.6663) time: 0.1732 data: 0.0819 max mem: 9377 +Train: [75] [4300/6250] eta: 0:05:32 lr: 0.000019 grad: 0.1944 (0.2220) loss: 0.6559 (0.6661) time: 0.1480 data: 0.0597 max mem: 9377 +Train: [75] [4400/6250] eta: 0:05:15 lr: 0.000019 grad: 0.2055 (0.2219) loss: 0.6494 (0.6661) time: 0.1757 data: 0.0895 max mem: 9377 +Train: [75] [4500/6250] eta: 0:04:57 lr: 0.000019 grad: 0.1990 (0.2220) loss: 0.6593 (0.6660) time: 0.1547 data: 0.0725 max mem: 9377 +Train: [75] [4600/6250] eta: 0:04:41 lr: 0.000019 grad: 0.2103 (0.2221) loss: 0.6484 (0.6658) time: 0.1961 data: 0.1146 max mem: 9377 +Train: [75] [4700/6250] eta: 0:04:24 lr: 0.000019 grad: 0.1969 (0.2221) loss: 0.6533 (0.6658) time: 0.1926 data: 0.1059 max mem: 9377 +Train: [75] [4800/6250] eta: 0:04:07 lr: 0.000019 grad: 0.1943 (0.2220) loss: 0.6636 (0.6658) time: 0.1453 data: 0.0461 max mem: 9377 +Train: [75] [4900/6250] eta: 0:03:49 lr: 0.000019 grad: 0.1950 (0.2220) loss: 0.6754 (0.6658) time: 0.1645 data: 0.0817 max mem: 9377 +Train: [75] [5000/6250] eta: 0:03:32 lr: 0.000019 grad: 0.1893 (0.2217) loss: 0.6766 (0.6657) time: 0.1889 data: 0.0915 max mem: 9377 +Train: [75] [5100/6250] eta: 0:03:15 lr: 0.000019 grad: 0.1958 (0.2216) loss: 0.6699 (0.6657) time: 0.1774 data: 0.0806 max mem: 9377 +Train: [75] [5200/6250] eta: 0:02:58 lr: 0.000019 grad: 0.1953 (0.2214) loss: 0.6821 (0.6657) time: 0.1495 data: 0.0613 max mem: 9377 +Train: [75] [5300/6250] eta: 0:02:41 lr: 0.000019 grad: 0.1932 (0.2212) loss: 0.6641 (0.6657) time: 0.1366 data: 0.0525 max mem: 9377 +Train: [75] [5400/6250] eta: 0:02:24 lr: 0.000019 grad: 0.2001 (0.2214) loss: 0.6459 (0.6657) time: 0.1508 data: 0.0673 max mem: 9377 +Train: [75] [5500/6250] eta: 0:02:06 lr: 0.000019 grad: 0.1940 (0.2212) loss: 0.6647 (0.6657) time: 0.1613 data: 0.0673 max mem: 9377 +Train: [75] [5600/6250] eta: 0:01:49 lr: 0.000019 grad: 0.1970 (0.2210) loss: 0.6781 (0.6657) time: 0.1458 data: 0.0608 max mem: 9377 +Train: [75] [5700/6250] eta: 0:01:32 lr: 0.000019 grad: 0.1934 (0.2208) loss: 0.6737 (0.6659) time: 0.1729 data: 0.0928 max mem: 9377 +Train: [75] [5800/6250] eta: 0:01:15 lr: 0.000019 grad: 0.1908 (0.2205) loss: 0.6758 (0.6660) time: 0.1768 data: 0.1002 max mem: 9377 +Train: [75] [5900/6250] eta: 0:00:58 lr: 0.000019 grad: 0.1946 (0.2203) loss: 0.6631 (0.6660) time: 0.1767 data: 0.0943 max mem: 9377 +Train: [75] [6000/6250] eta: 0:00:42 lr: 0.000019 grad: 0.1968 (0.2202) loss: 0.6637 (0.6660) time: 0.1705 data: 0.0803 max mem: 9377 +Train: [75] [6100/6250] eta: 0:00:25 lr: 0.000019 grad: 0.2140 (0.2202) loss: 0.6558 (0.6660) time: 0.1551 data: 0.0692 max mem: 9377 +Train: [75] [6200/6250] eta: 0:00:08 lr: 0.000019 grad: 0.1987 (0.2205) loss: 0.6628 (0.6660) time: 0.1588 data: 0.0745 max mem: 9377 +Train: [75] [6249/6250] eta: 0:00:00 lr: 0.000019 grad: 0.1993 (0.2205) loss: 0.6627 (0.6660) time: 0.1839 data: 0.1042 max mem: 9377 +Train: [75] Total time: 0:17:35 (0.1688 s / it) +Averaged stats: lr: 0.000019 grad: 0.1993 (0.2205) loss: 0.6627 (0.6660) +Eval (hcp-train-subset): [75] [ 0/62] eta: 0:05:52 loss: 0.8930 (0.8930) time: 5.6873 data: 5.6570 max mem: 9377 +Eval (hcp-train-subset): [75] [61/62] eta: 0:00:00 loss: 0.9068 (0.9073) time: 0.1311 data: 0.1044 max mem: 9377 +Eval (hcp-train-subset): [75] Total time: 0:00:14 (0.2393 s / it) +Averaged stats (hcp-train-subset): loss: 0.9068 (0.9073) +Eval (hcp-val): [75] [ 0/62] eta: 0:06:36 loss: 0.9085 (0.9085) time: 6.4029 data: 6.3721 max mem: 9377 +Eval (hcp-val): [75] [61/62] eta: 0:00:00 loss: 0.9075 (0.9063) time: 0.1213 data: 0.0965 max mem: 9377 +Eval (hcp-val): [75] Total time: 0:00:14 (0.2321 s / it) +Averaged stats (hcp-val): loss: 0.9075 (0.9063) +Eval (nsd-val): [75] [ 0/62] eta: 0:05:52 loss: 0.9011 (0.9011) time: 5.6899 data: 5.6596 max mem: 9377 +Eval (nsd-val): [75] [61/62] eta: 0:00:00 loss: 0.9063 (0.9098) time: 0.1506 data: 0.1254 max mem: 9377 +Eval (nsd-val): [75] Total time: 0:00:14 (0.2373 s / it) +Averaged stats (nsd-val): loss: 0.9063 (0.9098) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [76] [ 0/6250] eta: 9:27:22 lr: 0.000019 grad: 0.4752 (0.4752) loss: 0.7892 (0.7892) time: 5.4468 data: 5.2319 max mem: 9377 +Train: [76] [ 100/6250] eta: 0:22:06 lr: 0.000019 grad: 0.3540 (0.3474) loss: 0.6808 (0.6959) time: 0.1633 data: 0.0624 max mem: 9377 +Train: [76] [ 200/6250] eta: 0:19:28 lr: 0.000019 grad: 0.2884 (0.3478) loss: 0.6473 (0.6761) time: 0.1711 data: 0.0621 max mem: 9377 +Train: [76] [ 300/6250] eta: 0:18:29 lr: 0.000019 grad: 0.2461 (0.3261) loss: 0.6516 (0.6656) time: 0.1885 data: 0.0851 max mem: 9377 +Train: [76] [ 400/6250] eta: 0:17:39 lr: 0.000019 grad: 0.2351 (0.3075) loss: 0.6549 (0.6629) time: 0.1661 data: 0.0630 max mem: 9377 +Train: [76] [ 500/6250] eta: 0:16:57 lr: 0.000019 grad: 0.2206 (0.2956) loss: 0.6523 (0.6617) time: 0.1891 data: 0.0858 max mem: 9377 +Train: [76] [ 600/6250] eta: 0:16:15 lr: 0.000019 grad: 0.2123 (0.2867) loss: 0.6513 (0.6594) time: 0.1415 data: 0.0367 max mem: 9377 +Train: [76] [ 700/6250] eta: 0:15:51 lr: 0.000019 grad: 0.2094 (0.2783) loss: 0.6426 (0.6576) time: 0.1524 data: 0.0607 max mem: 9377 +Train: [76] [ 800/6250] eta: 0:15:26 lr: 0.000018 grad: 0.2046 (0.2712) loss: 0.6388 (0.6567) time: 0.1504 data: 0.0553 max mem: 9377 +Train: [76] [ 900/6250] eta: 0:15:10 lr: 0.000018 grad: 0.1989 (0.2647) loss: 0.6645 (0.6572) time: 0.1730 data: 0.0800 max mem: 9377 +Train: [76] [1000/6250] eta: 0:14:51 lr: 0.000018 grad: 0.1966 (0.2603) loss: 0.6797 (0.6581) time: 0.1706 data: 0.0797 max mem: 9377 +Train: [76] [1100/6250] eta: 0:14:27 lr: 0.000018 grad: 0.1916 (0.2556) loss: 0.6701 (0.6589) time: 0.1521 data: 0.0636 max mem: 9377 +Train: [76] [1200/6250] eta: 0:14:06 lr: 0.000018 grad: 0.1985 (0.2515) loss: 0.6700 (0.6596) time: 0.1657 data: 0.0671 max mem: 9377 +Train: [76] [1300/6250] eta: 0:13:45 lr: 0.000018 grad: 0.1987 (0.2496) loss: 0.6567 (0.6603) time: 0.1455 data: 0.0539 max mem: 9377 +Train: [76] [1400/6250] eta: 0:13:28 lr: 0.000018 grad: 0.1948 (0.2466) loss: 0.6753 (0.6611) time: 0.1758 data: 0.0840 max mem: 9377 +Train: [76] [1500/6250] eta: 0:13:05 lr: 0.000018 grad: 0.2039 (0.2445) loss: 0.6656 (0.6614) time: 0.1272 data: 0.0341 max mem: 9377 +Train: [76] [1600/6250] eta: 0:12:44 lr: 0.000018 grad: 0.1995 (0.2430) loss: 0.6546 (0.6619) time: 0.1582 data: 0.0676 max mem: 9377 +Train: [76] [1700/6250] eta: 0:12:24 lr: 0.000018 grad: 0.2064 (0.2413) loss: 0.6703 (0.6624) time: 0.1463 data: 0.0589 max mem: 9377 +Train: [76] [1800/6250] eta: 0:12:06 lr: 0.000018 grad: 0.1867 (0.2396) loss: 0.6703 (0.6627) time: 0.1557 data: 0.0620 max mem: 9377 +Train: [76] [1900/6250] eta: 0:11:48 lr: 0.000018 grad: 0.2008 (0.2387) loss: 0.6628 (0.6629) time: 0.1536 data: 0.0650 max mem: 9377 +Train: [76] [2000/6250] eta: 0:11:31 lr: 0.000018 grad: 0.2069 (0.2379) loss: 0.6659 (0.6629) time: 0.1550 data: 0.0625 max mem: 9377 +Train: [76] [2100/6250] eta: 0:11:14 lr: 0.000018 grad: 0.2021 (0.2371) loss: 0.6635 (0.6630) time: 0.1685 data: 0.0777 max mem: 9377 +Train: [76] [2200/6250] eta: 0:10:56 lr: 0.000018 grad: 0.1967 (0.2361) loss: 0.6634 (0.6631) time: 0.1460 data: 0.0624 max mem: 9377 +Train: [76] [2300/6250] eta: 0:10:39 lr: 0.000018 grad: 0.1957 (0.2354) loss: 0.6677 (0.6630) time: 0.1403 data: 0.0463 max mem: 9377 +Train: [76] [2400/6250] eta: 0:10:23 lr: 0.000018 grad: 0.1950 (0.2353) loss: 0.6647 (0.6632) time: 0.1567 data: 0.0756 max mem: 9377 +Train: [76] [2500/6250] eta: 0:10:09 lr: 0.000018 grad: 0.1951 (0.2343) loss: 0.6610 (0.6635) time: 0.1653 data: 0.0739 max mem: 9377 +Train: [76] [2600/6250] eta: 0:09:53 lr: 0.000018 grad: 0.1981 (0.2340) loss: 0.6594 (0.6637) time: 0.1773 data: 0.0836 max mem: 9377 +Train: [76] [2700/6250] eta: 0:09:38 lr: 0.000018 grad: 0.1994 (0.2338) loss: 0.6580 (0.6638) time: 0.1786 data: 0.0869 max mem: 9377 +Train: [76] [2800/6250] eta: 0:09:21 lr: 0.000018 grad: 0.1927 (0.2331) loss: 0.6658 (0.6638) time: 0.1776 data: 0.0889 max mem: 9377 +Train: [76] [2900/6250] eta: 0:09:05 lr: 0.000018 grad: 0.1990 (0.2328) loss: 0.6669 (0.6640) time: 0.1505 data: 0.0602 max mem: 9377 +Train: [76] [3000/6250] eta: 0:08:49 lr: 0.000018 grad: 0.1965 (0.2322) loss: 0.6707 (0.6640) time: 0.1636 data: 0.0690 max mem: 9377 +Train: [76] [3100/6250] eta: 0:08:33 lr: 0.000018 grad: 0.1928 (0.2322) loss: 0.6730 (0.6641) time: 0.1654 data: 0.0720 max mem: 9377 +Train: [76] [3200/6250] eta: 0:08:17 lr: 0.000018 grad: 0.1937 (0.2316) loss: 0.6551 (0.6640) time: 0.1739 data: 0.0882 max mem: 9377 +Train: [76] [3300/6250] eta: 0:08:00 lr: 0.000018 grad: 0.1997 (0.2312) loss: 0.6647 (0.6639) time: 0.1497 data: 0.0553 max mem: 9377 +Train: [76] [3400/6250] eta: 0:07:43 lr: 0.000018 grad: 0.1972 (0.2311) loss: 0.6667 (0.6638) time: 0.1565 data: 0.0640 max mem: 9377 +Train: [76] [3500/6250] eta: 0:07:26 lr: 0.000018 grad: 0.1899 (0.2312) loss: 0.6598 (0.6638) time: 0.1566 data: 0.0728 max mem: 9377 +Train: [76] [3600/6250] eta: 0:07:09 lr: 0.000018 grad: 0.1952 (0.2306) loss: 0.6534 (0.6638) time: 0.1448 data: 0.0515 max mem: 9377 +Train: [76] [3700/6250] eta: 0:06:52 lr: 0.000018 grad: 0.1924 (0.2300) loss: 0.6616 (0.6638) time: 0.1589 data: 0.0725 max mem: 9377 +Train: [76] [3800/6250] eta: 0:06:35 lr: 0.000018 grad: 0.1956 (0.2297) loss: 0.6586 (0.6636) time: 0.1355 data: 0.0478 max mem: 9377 +Train: [76] [3900/6250] eta: 0:06:19 lr: 0.000018 grad: 0.2007 (0.2293) loss: 0.6652 (0.6635) time: 0.1516 data: 0.0635 max mem: 9377 +Train: [76] [4000/6250] eta: 0:06:03 lr: 0.000018 grad: 0.1914 (0.2289) loss: 0.6731 (0.6634) time: 0.1525 data: 0.0686 max mem: 9377 +Train: [76] [4100/6250] eta: 0:05:46 lr: 0.000018 grad: 0.1939 (0.2284) loss: 0.6592 (0.6635) time: 0.1480 data: 0.0537 max mem: 9377 +Train: [76] [4200/6250] eta: 0:05:29 lr: 0.000018 grad: 0.1956 (0.2281) loss: 0.6477 (0.6634) time: 0.1379 data: 0.0426 max mem: 9377 +Train: [76] [4300/6250] eta: 0:05:13 lr: 0.000018 grad: 0.1939 (0.2283) loss: 0.6550 (0.6633) time: 0.1770 data: 0.0974 max mem: 9377 +Train: [76] [4400/6250] eta: 0:04:57 lr: 0.000018 grad: 0.1989 (0.2286) loss: 0.6510 (0.6632) time: 0.1515 data: 0.0709 max mem: 9377 +Train: [76] [4500/6250] eta: 0:04:41 lr: 0.000018 grad: 0.1915 (0.2285) loss: 0.6721 (0.6632) time: 0.1589 data: 0.0724 max mem: 9377 +Train: [76] [4600/6250] eta: 0:04:25 lr: 0.000018 grad: 0.2007 (0.2283) loss: 0.6529 (0.6632) time: 0.1347 data: 0.0411 max mem: 9377 +Train: [76] [4700/6250] eta: 0:04:09 lr: 0.000018 grad: 0.1929 (0.2282) loss: 0.6589 (0.6632) time: 0.1934 data: 0.1071 max mem: 9377 +Train: [76] [4800/6250] eta: 0:03:53 lr: 0.000018 grad: 0.2030 (0.2279) loss: 0.6676 (0.6632) time: 0.2001 data: 0.1084 max mem: 9377 +Train: [76] [4900/6250] eta: 0:03:37 lr: 0.000018 grad: 0.1935 (0.2276) loss: 0.6588 (0.6631) time: 0.1695 data: 0.0831 max mem: 9377 +Train: [76] [5000/6250] eta: 0:03:21 lr: 0.000018 grad: 0.1972 (0.2274) loss: 0.6747 (0.6632) time: 0.1317 data: 0.0383 max mem: 9377 +Train: [76] [5100/6250] eta: 0:03:05 lr: 0.000017 grad: 0.1980 (0.2272) loss: 0.6674 (0.6632) time: 0.1474 data: 0.0510 max mem: 9377 +Train: [76] [5200/6250] eta: 0:02:48 lr: 0.000017 grad: 0.2115 (0.2270) loss: 0.6669 (0.6632) time: 0.1349 data: 0.0432 max mem: 9377 +Train: [76] [5300/6250] eta: 0:02:32 lr: 0.000017 grad: 0.1956 (0.2269) loss: 0.6607 (0.6632) time: 0.1577 data: 0.0489 max mem: 9377 +Train: [76] [5400/6250] eta: 0:02:16 lr: 0.000017 grad: 0.1915 (0.2267) loss: 0.6625 (0.6632) time: 0.1247 data: 0.0339 max mem: 9377 +Train: [76] [5500/6250] eta: 0:02:00 lr: 0.000017 grad: 0.1906 (0.2265) loss: 0.6614 (0.6632) time: 0.1400 data: 0.0484 max mem: 9377 +Train: [76] [5600/6250] eta: 0:01:44 lr: 0.000017 grad: 0.2070 (0.2265) loss: 0.6642 (0.6634) time: 0.1467 data: 0.0631 max mem: 9377 +Train: [76] [5700/6250] eta: 0:01:28 lr: 0.000017 grad: 0.1938 (0.2264) loss: 0.6739 (0.6635) time: 0.1763 data: 0.0914 max mem: 9377 +Train: [76] [5800/6250] eta: 0:01:12 lr: 0.000017 grad: 0.1924 (0.2262) loss: 0.6534 (0.6636) time: 0.1744 data: 0.0878 max mem: 9377 +Train: [76] [5900/6250] eta: 0:00:56 lr: 0.000017 grad: 0.1911 (0.2259) loss: 0.6600 (0.6637) time: 0.1511 data: 0.0635 max mem: 9377 +Train: [76] [6000/6250] eta: 0:00:40 lr: 0.000017 grad: 0.2057 (0.2260) loss: 0.6604 (0.6638) time: 0.1628 data: 0.0800 max mem: 9377 +Train: [76] [6100/6250] eta: 0:00:23 lr: 0.000017 grad: 0.1952 (0.2259) loss: 0.6571 (0.6638) time: 0.1463 data: 0.0536 max mem: 9377 +Train: [76] [6200/6250] eta: 0:00:07 lr: 0.000017 grad: 0.1951 (0.2259) loss: 0.6692 (0.6639) time: 0.1532 data: 0.0679 max mem: 9377 +Train: [76] [6249/6250] eta: 0:00:00 lr: 0.000017 grad: 0.1969 (0.2259) loss: 0.6526 (0.6638) time: 0.1286 data: 0.0346 max mem: 9377 +Train: [76] Total time: 0:16:46 (0.1610 s / it) +Averaged stats: lr: 0.000017 grad: 0.1969 (0.2259) loss: 0.6526 (0.6638) +Eval (hcp-train-subset): [76] [ 0/62] eta: 0:06:32 loss: 0.8986 (0.8986) time: 6.3384 data: 6.3083 max mem: 9377 +Eval (hcp-train-subset): [76] [61/62] eta: 0:00:00 loss: 0.9085 (0.9083) time: 0.1577 data: 0.1322 max mem: 9377 +Eval (hcp-train-subset): [76] Total time: 0:00:16 (0.2613 s / it) +Averaged stats (hcp-train-subset): loss: 0.9085 (0.9083) +Eval (hcp-val): [76] [ 0/62] eta: 0:05:24 loss: 0.9074 (0.9074) time: 5.2392 data: 5.1745 max mem: 9377 +Eval (hcp-val): [76] [61/62] eta: 0:00:00 loss: 0.9050 (0.9085) time: 0.1747 data: 0.1490 max mem: 9377 +Eval (hcp-val): [76] Total time: 0:00:16 (0.2705 s / it) +Averaged stats (hcp-val): loss: 0.9050 (0.9085) +Eval (nsd-val): [76] [ 0/62] eta: 0:04:28 loss: 0.9111 (0.9111) time: 4.3336 data: 4.2423 max mem: 9377 +Eval (nsd-val): [76] [61/62] eta: 0:00:00 loss: 0.9063 (0.9098) time: 0.1497 data: 0.1241 max mem: 9377 +Eval (nsd-val): [76] Total time: 0:00:16 (0.2618 s / it) +Averaged stats (nsd-val): loss: 0.9063 (0.9098) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [77] [ 0/6250] eta: 11:57:50 lr: 0.000017 grad: 0.2530 (0.2530) loss: 0.7453 (0.7453) time: 6.8913 data: 6.7565 max mem: 9377 +Train: [77] [ 100/6250] eta: 0:24:37 lr: 0.000017 grad: 0.3097 (0.3314) loss: 0.6864 (0.6868) time: 0.1801 data: 0.0797 max mem: 9377 +Train: [77] [ 200/6250] eta: 0:20:56 lr: 0.000017 grad: 0.2635 (0.3126) loss: 0.6721 (0.6803) time: 0.1959 data: 0.0809 max mem: 9377 +Train: [77] [ 300/6250] eta: 0:18:53 lr: 0.000017 grad: 0.2354 (0.3030) loss: 0.6745 (0.6791) time: 0.1639 data: 0.0599 max mem: 9377 +Train: [77] [ 400/6250] eta: 0:17:35 lr: 0.000017 grad: 0.2220 (0.2927) loss: 0.6545 (0.6769) time: 0.1530 data: 0.0563 max mem: 9377 +Train: [77] [ 500/6250] eta: 0:16:52 lr: 0.000017 grad: 0.2592 (0.2874) loss: 0.6622 (0.6760) time: 0.1183 data: 0.0045 max mem: 9377 +Train: [77] [ 600/6250] eta: 0:16:18 lr: 0.000017 grad: 0.2140 (0.2853) loss: 0.6733 (0.6754) time: 0.1608 data: 0.0638 max mem: 9377 +Train: [77] [ 700/6250] eta: 0:15:51 lr: 0.000017 grad: 0.1982 (0.2764) loss: 0.6677 (0.6749) time: 0.1868 data: 0.0950 max mem: 9377 +Train: [77] [ 800/6250] eta: 0:15:23 lr: 0.000017 grad: 0.1953 (0.2699) loss: 0.6671 (0.6748) time: 0.1771 data: 0.0774 max mem: 9377 +Train: [77] [ 900/6250] eta: 0:15:25 lr: 0.000017 grad: 0.2023 (0.2646) loss: 0.6637 (0.6746) time: 0.2151 data: 0.1200 max mem: 9377 +Train: [77] [1000/6250] eta: 0:15:09 lr: 0.000017 grad: 0.2008 (0.2592) loss: 0.6619 (0.6743) time: 0.1995 data: 0.1003 max mem: 9377 +Train: [77] [1100/6250] eta: 0:14:49 lr: 0.000017 grad: 0.1964 (0.2560) loss: 0.6780 (0.6738) time: 0.1563 data: 0.0551 max mem: 9377 +Train: [77] [1200/6250] eta: 0:14:34 lr: 0.000017 grad: 0.1907 (0.2526) loss: 0.6763 (0.6733) time: 0.1993 data: 0.1020 max mem: 9377 +Train: [77] [1300/6250] eta: 0:14:23 lr: 0.000017 grad: 0.1995 (0.2495) loss: 0.6662 (0.6729) time: 0.2052 data: 0.1091 max mem: 9377 +Train: [77] [1400/6250] eta: 0:14:06 lr: 0.000017 grad: 0.1998 (0.2475) loss: 0.6594 (0.6725) time: 0.1652 data: 0.0695 max mem: 9377 +Train: [77] [1500/6250] eta: 0:13:50 lr: 0.000017 grad: 0.1957 (0.2473) loss: 0.6545 (0.6719) time: 0.1833 data: 0.0936 max mem: 9377 +Train: [77] [1600/6250] eta: 0:13:30 lr: 0.000017 grad: 0.1854 (0.2464) loss: 0.6812 (0.6715) time: 0.1561 data: 0.0641 max mem: 9377 +Train: [77] [1700/6250] eta: 0:13:09 lr: 0.000017 grad: 0.1973 (0.2444) loss: 0.6572 (0.6710) time: 0.1551 data: 0.0665 max mem: 9377 +Train: [77] [1800/6250] eta: 0:12:49 lr: 0.000017 grad: 0.1987 (0.2434) loss: 0.6614 (0.6705) time: 0.1457 data: 0.0631 max mem: 9377 +Train: [77] [1900/6250] eta: 0:12:30 lr: 0.000017 grad: 0.1967 (0.2426) loss: 0.6625 (0.6701) time: 0.1625 data: 0.0660 max mem: 9377 +Train: [77] [2000/6250] eta: 0:12:11 lr: 0.000017 grad: 0.2081 (0.2411) loss: 0.6449 (0.6693) time: 0.1718 data: 0.0811 max mem: 9377 +Train: [77] [2100/6250] eta: 0:11:52 lr: 0.000017 grad: 0.1898 (0.2396) loss: 0.6634 (0.6688) time: 0.1690 data: 0.0818 max mem: 9377 +Train: [77] [2200/6250] eta: 0:11:31 lr: 0.000017 grad: 0.1955 (0.2393) loss: 0.6600 (0.6685) time: 0.1652 data: 0.0740 max mem: 9377 +Train: [77] [2300/6250] eta: 0:11:13 lr: 0.000017 grad: 0.1941 (0.2390) loss: 0.6616 (0.6680) time: 0.1485 data: 0.0559 max mem: 9377 +Train: [77] [2400/6250] eta: 0:10:53 lr: 0.000017 grad: 0.1846 (0.2382) loss: 0.6761 (0.6679) time: 0.1395 data: 0.0468 max mem: 9377 +Train: [77] [2500/6250] eta: 0:10:36 lr: 0.000017 grad: 0.1966 (0.2370) loss: 0.6683 (0.6678) time: 0.2141 data: 0.1372 max mem: 9377 +Train: [77] [2600/6250] eta: 0:10:18 lr: 0.000017 grad: 0.1979 (0.2366) loss: 0.6577 (0.6678) time: 0.1486 data: 0.0625 max mem: 9377 +Train: [77] [2700/6250] eta: 0:10:01 lr: 0.000017 grad: 0.1943 (0.2357) loss: 0.6538 (0.6679) time: 0.1604 data: 0.0791 max mem: 9377 +Train: [77] [2800/6250] eta: 0:09:44 lr: 0.000017 grad: 0.1901 (0.2348) loss: 0.6789 (0.6681) time: 0.1719 data: 0.0907 max mem: 9377 +Train: [77] [2900/6250] eta: 0:09:27 lr: 0.000017 grad: 0.1943 (0.2340) loss: 0.6931 (0.6684) time: 0.1838 data: 0.0990 max mem: 9377 +Train: [77] [3000/6250] eta: 0:09:10 lr: 0.000017 grad: 0.2001 (0.2333) loss: 0.6777 (0.6685) time: 0.1781 data: 0.0889 max mem: 9377 +Train: [77] [3100/6250] eta: 0:08:52 lr: 0.000017 grad: 0.1940 (0.2327) loss: 0.6692 (0.6685) time: 0.1737 data: 0.0857 max mem: 9377 +Train: [77] [3200/6250] eta: 0:08:35 lr: 0.000017 grad: 0.1990 (0.2329) loss: 0.6716 (0.6686) time: 0.1813 data: 0.0870 max mem: 9377 +Train: [77] [3300/6250] eta: 0:08:18 lr: 0.000016 grad: 0.1975 (0.2321) loss: 0.6765 (0.6686) time: 0.1565 data: 0.0510 max mem: 9377 +Train: [77] [3400/6250] eta: 0:08:01 lr: 0.000016 grad: 0.1971 (0.2315) loss: 0.6570 (0.6686) time: 0.1814 data: 0.1003 max mem: 9377 +Train: [77] [3500/6250] eta: 0:07:43 lr: 0.000016 grad: 0.1985 (0.2315) loss: 0.6580 (0.6685) time: 0.1545 data: 0.0598 max mem: 9377 +Train: [77] [3600/6250] eta: 0:07:25 lr: 0.000016 grad: 0.1990 (0.2318) loss: 0.6647 (0.6685) time: 0.1482 data: 0.0566 max mem: 9377 +Train: [77] [3700/6250] eta: 0:07:08 lr: 0.000016 grad: 0.1931 (0.2315) loss: 0.6711 (0.6686) time: 0.1426 data: 0.0576 max mem: 9377 +Train: [77] [3800/6250] eta: 0:06:50 lr: 0.000016 grad: 0.1981 (0.2312) loss: 0.6677 (0.6685) time: 0.1618 data: 0.0747 max mem: 9377 +Train: [77] [3900/6250] eta: 0:06:33 lr: 0.000016 grad: 0.1969 (0.2309) loss: 0.6694 (0.6685) time: 0.1480 data: 0.0513 max mem: 9377 +Train: [77] [4000/6250] eta: 0:06:15 lr: 0.000016 grad: 0.1967 (0.2307) loss: 0.6614 (0.6684) time: 0.1680 data: 0.0855 max mem: 9377 +Train: [77] [4100/6250] eta: 0:05:58 lr: 0.000016 grad: 0.1969 (0.2306) loss: 0.6683 (0.6684) time: 0.1669 data: 0.0812 max mem: 9377 +Train: [77] [4200/6250] eta: 0:05:41 lr: 0.000016 grad: 0.2080 (0.2308) loss: 0.6774 (0.6684) time: 0.1716 data: 0.0830 max mem: 9377 +Train: [77] [4300/6250] eta: 0:05:24 lr: 0.000016 grad: 0.2232 (0.2306) loss: 0.6688 (0.6684) time: 0.1809 data: 0.0943 max mem: 9377 +Train: [77] [4400/6250] eta: 0:05:08 lr: 0.000016 grad: 0.2065 (0.2308) loss: 0.6597 (0.6685) time: 0.1785 data: 0.0929 max mem: 9377 +Train: [77] [4500/6250] eta: 0:04:51 lr: 0.000016 grad: 0.2053 (0.2309) loss: 0.6607 (0.6686) time: 0.1753 data: 0.0935 max mem: 9377 +Train: [77] [4600/6250] eta: 0:04:34 lr: 0.000016 grad: 0.1968 (0.2308) loss: 0.6644 (0.6686) time: 0.1275 data: 0.0418 max mem: 9377 +Train: [77] [4700/6250] eta: 0:04:17 lr: 0.000016 grad: 0.2009 (0.2305) loss: 0.6610 (0.6685) time: 0.1672 data: 0.0772 max mem: 9377 +Train: [77] [4800/6250] eta: 0:04:00 lr: 0.000016 grad: 0.2003 (0.2302) loss: 0.6676 (0.6684) time: 0.1739 data: 0.0911 max mem: 9377 +Train: [77] [4900/6250] eta: 0:03:44 lr: 0.000016 grad: 0.1976 (0.2301) loss: 0.6733 (0.6684) time: 0.1538 data: 0.0618 max mem: 9377 +Train: [77] [5000/6250] eta: 0:03:27 lr: 0.000016 grad: 0.2000 (0.2300) loss: 0.6554 (0.6684) time: 0.1682 data: 0.0818 max mem: 9377 +Train: [77] [5100/6250] eta: 0:03:10 lr: 0.000016 grad: 0.2049 (0.2303) loss: 0.6705 (0.6684) time: 0.1432 data: 0.0522 max mem: 9377 +Train: [77] [5200/6250] eta: 0:02:53 lr: 0.000016 grad: 0.2020 (0.2305) loss: 0.6827 (0.6685) time: 0.1548 data: 0.0563 max mem: 9377 +Train: [77] [5300/6250] eta: 0:02:36 lr: 0.000016 grad: 0.2039 (0.2308) loss: 0.6741 (0.6684) time: 0.1369 data: 0.0420 max mem: 9377 +Train: [77] [5400/6250] eta: 0:02:19 lr: 0.000016 grad: 0.2051 (0.2312) loss: 0.6656 (0.6685) time: 0.1324 data: 0.0396 max mem: 9377 +Train: [77] [5500/6250] eta: 0:02:03 lr: 0.000016 grad: 0.2010 (0.2314) loss: 0.6637 (0.6684) time: 0.1517 data: 0.0646 max mem: 9377 +Train: [77] [5600/6250] eta: 0:01:46 lr: 0.000016 grad: 0.1921 (0.2314) loss: 0.6729 (0.6685) time: 0.1585 data: 0.0724 max mem: 9377 +Train: [77] [5700/6250] eta: 0:01:30 lr: 0.000016 grad: 0.2027 (0.2314) loss: 0.6755 (0.6685) time: 0.1585 data: 0.0726 max mem: 9377 +Train: [77] [5800/6250] eta: 0:01:13 lr: 0.000016 grad: 0.2143 (0.2319) loss: 0.6624 (0.6684) time: 0.1855 data: 0.1016 max mem: 9377 +Train: [77] [5900/6250] eta: 0:00:57 lr: 0.000016 grad: 0.1985 (0.2323) loss: 0.6702 (0.6684) time: 0.1460 data: 0.0539 max mem: 9377 +Train: [77] [6000/6250] eta: 0:00:40 lr: 0.000016 grad: 0.2107 (0.2325) loss: 0.6628 (0.6683) time: 0.1740 data: 0.0826 max mem: 9377 +Train: [77] [6100/6250] eta: 0:00:24 lr: 0.000016 grad: 0.2052 (0.2328) loss: 0.6688 (0.6683) time: 0.1649 data: 0.0735 max mem: 9377 +Train: [77] [6200/6250] eta: 0:00:08 lr: 0.000016 grad: 0.2049 (0.2329) loss: 0.6617 (0.6682) time: 0.1511 data: 0.0657 max mem: 9377 +Train: [77] [6249/6250] eta: 0:00:00 lr: 0.000016 grad: 0.2064 (0.2331) loss: 0.6738 (0.6682) time: 0.1269 data: 0.0377 max mem: 9377 +Train: [77] Total time: 0:17:07 (0.1644 s / it) +Averaged stats: lr: 0.000016 grad: 0.2064 (0.2331) loss: 0.6738 (0.6682) +Eval (hcp-train-subset): [77] [ 0/62] eta: 0:06:07 loss: 0.8991 (0.8991) time: 5.9233 data: 5.8920 max mem: 9377 +Eval (hcp-train-subset): [77] [61/62] eta: 0:00:00 loss: 0.9073 (0.9079) time: 0.1540 data: 0.1268 max mem: 9377 +Eval (hcp-train-subset): [77] Total time: 0:00:15 (0.2463 s / it) +Averaged stats (hcp-train-subset): loss: 0.9073 (0.9079) +Eval (hcp-val): [77] [ 0/62] eta: 0:05:59 loss: 0.9121 (0.9121) time: 5.7941 data: 5.7631 max mem: 9377 +Eval (hcp-val): [77] [61/62] eta: 0:00:00 loss: 0.9081 (0.9091) time: 0.1425 data: 0.1157 max mem: 9377 +Eval (hcp-val): [77] Total time: 0:00:14 (0.2399 s / it) +Averaged stats (hcp-val): loss: 0.9081 (0.9091) +Eval (nsd-val): [77] [ 0/62] eta: 0:04:19 loss: 0.9078 (0.9078) time: 4.1892 data: 4.1146 max mem: 9377 +Eval (nsd-val): [77] [61/62] eta: 0:00:00 loss: 0.9106 (0.9138) time: 0.1375 data: 0.1114 max mem: 9377 +Eval (nsd-val): [77] Total time: 0:00:14 (0.2328 s / it) +Averaged stats (nsd-val): loss: 0.9106 (0.9138) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [78] [ 0/6250] eta: 8:38:55 lr: 0.000016 grad: 0.1819 (0.1819) loss: 0.7962 (0.7962) time: 4.9817 data: 4.7476 max mem: 9377 +Train: [78] [ 100/6250] eta: 0:22:53 lr: 0.000016 grad: 0.3189 (0.3386) loss: 0.6556 (0.6818) time: 0.1601 data: 0.0539 max mem: 9377 +Train: [78] [ 200/6250] eta: 0:19:46 lr: 0.000016 grad: 0.2785 (0.3296) loss: 0.6433 (0.6727) time: 0.1707 data: 0.0652 max mem: 9377 +Train: [78] [ 300/6250] eta: 0:19:07 lr: 0.000016 grad: 0.2435 (0.3158) loss: 0.6744 (0.6699) time: 0.1887 data: 0.0739 max mem: 9377 +Train: [78] [ 400/6250] eta: 0:18:26 lr: 0.000016 grad: 0.2195 (0.3053) loss: 0.6531 (0.6668) time: 0.1414 data: 0.0283 max mem: 9377 +Train: [78] [ 500/6250] eta: 0:17:41 lr: 0.000016 grad: 0.2250 (0.2994) loss: 0.6622 (0.6654) time: 0.1560 data: 0.0602 max mem: 9377 +Train: [78] [ 600/6250] eta: 0:17:03 lr: 0.000016 grad: 0.2200 (0.2930) loss: 0.6571 (0.6658) time: 0.1669 data: 0.0737 max mem: 9377 +Train: [78] [ 700/6250] eta: 0:16:35 lr: 0.000016 grad: 0.1978 (0.2869) loss: 0.6876 (0.6673) time: 0.1684 data: 0.0711 max mem: 9377 +Train: [78] [ 800/6250] eta: 0:16:06 lr: 0.000016 grad: 0.2022 (0.2853) loss: 0.6706 (0.6679) time: 0.1368 data: 0.0433 max mem: 9377 +Train: [78] [ 900/6250] eta: 0:15:52 lr: 0.000016 grad: 0.2246 (0.2821) loss: 0.6770 (0.6689) time: 0.1528 data: 0.0679 max mem: 9377 +Train: [78] [1000/6250] eta: 0:15:29 lr: 0.000016 grad: 0.2179 (0.2805) loss: 0.6698 (0.6696) time: 0.1696 data: 0.0790 max mem: 9377 +Train: [78] [1100/6250] eta: 0:15:03 lr: 0.000016 grad: 0.2107 (0.2799) loss: 0.6749 (0.6701) time: 0.1482 data: 0.0573 max mem: 9377 +Train: [78] [1200/6250] eta: 0:14:43 lr: 0.000016 grad: 0.2140 (0.2790) loss: 0.6779 (0.6703) time: 0.1657 data: 0.0678 max mem: 9377 +Train: [78] [1300/6250] eta: 0:14:23 lr: 0.000016 grad: 0.2129 (0.2774) loss: 0.6648 (0.6703) time: 0.1726 data: 0.0707 max mem: 9377 +Train: [78] [1400/6250] eta: 0:14:05 lr: 0.000016 grad: 0.1993 (0.2770) loss: 0.6620 (0.6703) time: 0.1668 data: 0.0762 max mem: 9377 +Train: [78] [1500/6250] eta: 0:13:45 lr: 0.000015 grad: 0.2248 (0.2761) loss: 0.6694 (0.6702) time: 0.1788 data: 0.0738 max mem: 9377 +Train: [78] [1600/6250] eta: 0:13:23 lr: 0.000015 grad: 0.2088 (0.2749) loss: 0.6695 (0.6700) time: 0.1652 data: 0.0729 max mem: 9377 +Train: [78] [1700/6250] eta: 0:13:01 lr: 0.000015 grad: 0.2018 (0.2721) loss: 0.6638 (0.6698) time: 0.1508 data: 0.0525 max mem: 9377 +Train: [78] [1800/6250] eta: 0:12:41 lr: 0.000015 grad: 0.1996 (0.2699) loss: 0.6647 (0.6695) time: 0.1653 data: 0.0715 max mem: 9377 +Train: [78] [1900/6250] eta: 0:12:20 lr: 0.000015 grad: 0.2021 (0.2682) loss: 0.6615 (0.6690) time: 0.1527 data: 0.0613 max mem: 9377 +Train: [78] [2000/6250] eta: 0:12:01 lr: 0.000015 grad: 0.2053 (0.2660) loss: 0.6566 (0.6686) time: 0.1550 data: 0.0602 max mem: 9377 +Train: [78] [2100/6250] eta: 0:11:40 lr: 0.000015 grad: 0.2175 (0.2652) loss: 0.6524 (0.6679) time: 0.1339 data: 0.0337 max mem: 9377 +Train: [78] [2200/6250] eta: 0:11:21 lr: 0.000015 grad: 0.2003 (0.2641) loss: 0.6508 (0.6674) time: 0.1390 data: 0.0545 max mem: 9377 +Train: [78] [2300/6250] eta: 0:11:01 lr: 0.000015 grad: 0.2032 (0.2632) loss: 0.6579 (0.6672) time: 0.1547 data: 0.0525 max mem: 9377 +Train: [78] [2400/6250] eta: 0:10:42 lr: 0.000015 grad: 0.2032 (0.2618) loss: 0.6729 (0.6670) time: 0.1485 data: 0.0565 max mem: 9377 +Train: [78] [2500/6250] eta: 0:10:24 lr: 0.000015 grad: 0.1961 (0.2618) loss: 0.6609 (0.6668) time: 0.2143 data: 0.1370 max mem: 9377 +Train: [78] [2600/6250] eta: 0:10:08 lr: 0.000015 grad: 0.2045 (0.2618) loss: 0.6688 (0.6669) time: 0.1549 data: 0.0712 max mem: 9377 +Train: [78] [2700/6250] eta: 0:09:51 lr: 0.000015 grad: 0.2053 (0.2609) loss: 0.6902 (0.6672) time: 0.1523 data: 0.0700 max mem: 9377 +Train: [78] [2800/6250] eta: 0:09:34 lr: 0.000015 grad: 0.2127 (0.2602) loss: 0.6663 (0.6674) time: 0.1551 data: 0.0662 max mem: 9377 +Train: [78] [2900/6250] eta: 0:09:17 lr: 0.000015 grad: 0.2026 (0.2592) loss: 0.6641 (0.6675) time: 0.1663 data: 0.0876 max mem: 9377 +Train: [78] [3000/6250] eta: 0:09:00 lr: 0.000015 grad: 0.2037 (0.2588) loss: 0.6682 (0.6677) time: 0.1635 data: 0.0736 max mem: 9377 +Train: [78] [3100/6250] eta: 0:08:43 lr: 0.000015 grad: 0.1960 (0.2576) loss: 0.6776 (0.6678) time: 0.1837 data: 0.0985 max mem: 9377 +Train: [78] [3200/6250] eta: 0:08:27 lr: 0.000015 grad: 0.1972 (0.2564) loss: 0.6768 (0.6680) time: 0.1635 data: 0.0714 max mem: 9377 +Train: [78] [3300/6250] eta: 0:08:10 lr: 0.000015 grad: 0.1971 (0.2554) loss: 0.6607 (0.6682) time: 0.1818 data: 0.0967 max mem: 9377 +Train: [78] [3400/6250] eta: 0:07:52 lr: 0.000015 grad: 0.1937 (0.2542) loss: 0.6616 (0.6681) time: 0.1487 data: 0.0558 max mem: 9377 +Train: [78] [3500/6250] eta: 0:07:34 lr: 0.000015 grad: 0.2086 (0.2532) loss: 0.6632 (0.6680) time: 0.1359 data: 0.0467 max mem: 9377 +Train: [78] [3600/6250] eta: 0:07:17 lr: 0.000015 grad: 0.1968 (0.2526) loss: 0.6649 (0.6677) time: 0.1441 data: 0.0558 max mem: 9377 +Train: [78] [3700/6250] eta: 0:06:59 lr: 0.000015 grad: 0.2010 (0.2527) loss: 0.6532 (0.6678) time: 0.1467 data: 0.0527 max mem: 9377 +Train: [78] [3800/6250] eta: 0:06:42 lr: 0.000015 grad: 0.2063 (0.2524) loss: 0.6655 (0.6677) time: 0.1527 data: 0.0635 max mem: 9377 +Train: [78] [3900/6250] eta: 0:06:25 lr: 0.000015 grad: 0.2328 (0.2518) loss: 0.6604 (0.6678) time: 0.1678 data: 0.0778 max mem: 9377 +Train: [78] [4000/6250] eta: 0:06:08 lr: 0.000015 grad: 0.1992 (0.2519) loss: 0.6542 (0.6678) time: 0.1588 data: 0.0681 max mem: 9377 +Train: [78] [4100/6250] eta: 0:05:51 lr: 0.000015 grad: 0.1983 (0.2519) loss: 0.6739 (0.6678) time: 0.1847 data: 0.0968 max mem: 9377 +Train: [78] [4200/6250] eta: 0:05:35 lr: 0.000015 grad: 0.2040 (0.2517) loss: 0.6728 (0.6678) time: 0.1842 data: 0.1028 max mem: 9377 +Train: [78] [4300/6250] eta: 0:05:19 lr: 0.000015 grad: 0.2020 (0.2514) loss: 0.6734 (0.6679) time: 0.2359 data: 0.1567 max mem: 9377 +Train: [78] [4400/6250] eta: 0:05:03 lr: 0.000015 grad: 0.2059 (0.2509) loss: 0.6561 (0.6679) time: 0.2159 data: 0.1420 max mem: 9377 +Train: [78] [4500/6250] eta: 0:04:46 lr: 0.000015 grad: 0.2026 (0.2501) loss: 0.6764 (0.6680) time: 0.1691 data: 0.0902 max mem: 9377 +Train: [78] [4600/6250] eta: 0:04:30 lr: 0.000015 grad: 0.2415 (0.2502) loss: 0.6713 (0.6680) time: 0.1551 data: 0.0719 max mem: 9377 +Train: [78] [4700/6250] eta: 0:04:13 lr: 0.000015 grad: 0.2027 (0.2507) loss: 0.6730 (0.6681) time: 0.1398 data: 0.0546 max mem: 9377 +Train: [78] [4800/6250] eta: 0:03:57 lr: 0.000015 grad: 0.1999 (0.2499) loss: 0.6630 (0.6681) time: 0.1769 data: 0.0880 max mem: 9377 +Train: [78] [4900/6250] eta: 0:03:40 lr: 0.000015 grad: 0.1997 (0.2497) loss: 0.6624 (0.6681) time: 0.1778 data: 0.0912 max mem: 9377 +Train: [78] [5000/6250] eta: 0:03:24 lr: 0.000015 grad: 0.1911 (0.2491) loss: 0.6610 (0.6681) time: 0.1325 data: 0.0313 max mem: 9377 +Train: [78] [5100/6250] eta: 0:03:07 lr: 0.000015 grad: 0.1968 (0.2485) loss: 0.6739 (0.6682) time: 0.1707 data: 0.0799 max mem: 9377 +Train: [78] [5200/6250] eta: 0:02:51 lr: 0.000015 grad: 0.1948 (0.2478) loss: 0.6648 (0.6681) time: 0.1208 data: 0.0261 max mem: 9377 +Train: [78] [5300/6250] eta: 0:02:34 lr: 0.000015 grad: 0.1975 (0.2473) loss: 0.6605 (0.6681) time: 0.1519 data: 0.0594 max mem: 9377 +Train: [78] [5400/6250] eta: 0:02:18 lr: 0.000015 grad: 0.2167 (0.2471) loss: 0.6560 (0.6680) time: 0.1521 data: 0.0609 max mem: 9377 +Train: [78] [5500/6250] eta: 0:02:01 lr: 0.000015 grad: 0.2004 (0.2468) loss: 0.6622 (0.6680) time: 0.1622 data: 0.0722 max mem: 9377 +Train: [78] [5600/6250] eta: 0:01:45 lr: 0.000015 grad: 0.1961 (0.2467) loss: 0.6647 (0.6680) time: 0.1563 data: 0.0667 max mem: 9377 +Train: [78] [5700/6250] eta: 0:01:29 lr: 0.000015 grad: 0.1997 (0.2466) loss: 0.6696 (0.6680) time: 0.1607 data: 0.0710 max mem: 9377 +Train: [78] [5800/6250] eta: 0:01:12 lr: 0.000015 grad: 0.2005 (0.2464) loss: 0.6748 (0.6681) time: 0.1556 data: 0.0716 max mem: 9377 +Train: [78] [5900/6250] eta: 0:00:56 lr: 0.000015 grad: 0.1959 (0.2458) loss: 0.6722 (0.6682) time: 0.1449 data: 0.0542 max mem: 9377 +Train: [78] [6000/6250] eta: 0:00:40 lr: 0.000015 grad: 0.1980 (0.2453) loss: 0.6708 (0.6683) time: 0.1466 data: 0.0551 max mem: 9377 +Train: [78] [6100/6250] eta: 0:00:24 lr: 0.000015 grad: 0.1987 (0.2449) loss: 0.6731 (0.6684) time: 0.1434 data: 0.0549 max mem: 9377 +Train: [78] [6200/6250] eta: 0:00:08 lr: 0.000014 grad: 0.2004 (0.2446) loss: 0.6716 (0.6684) time: 0.1667 data: 0.0783 max mem: 9377 +Train: [78] [6249/6250] eta: 0:00:00 lr: 0.000014 grad: 0.1943 (0.2444) loss: 0.6808 (0.6684) time: 0.1100 data: 0.0080 max mem: 9377 +Train: [78] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000014 grad: 0.1943 (0.2444) loss: 0.6808 (0.6684) +Eval (hcp-train-subset): [78] [ 0/62] eta: 0:05:28 loss: 0.8998 (0.8998) time: 5.3040 data: 5.2313 max mem: 9377 +Eval (hcp-train-subset): [78] [61/62] eta: 0:00:00 loss: 0.9072 (0.9094) time: 0.1505 data: 0.1250 max mem: 9377 +Eval (hcp-train-subset): [78] Total time: 0:00:16 (0.2686 s / it) +Averaged stats (hcp-train-subset): loss: 0.9072 (0.9094) +Eval (hcp-val): [78] [ 0/62] eta: 0:05:02 loss: 0.9213 (0.9213) time: 4.8728 data: 4.8114 max mem: 9377 +Eval (hcp-val): [78] [61/62] eta: 0:00:00 loss: 0.9077 (0.9094) time: 0.1521 data: 0.1266 max mem: 9377 +Eval (hcp-val): [78] Total time: 0:00:16 (0.2655 s / it) +Averaged stats (hcp-val): loss: 0.9077 (0.9094) +Eval (nsd-val): [78] [ 0/62] eta: 0:04:00 loss: 0.9132 (0.9132) time: 3.8871 data: 3.8207 max mem: 9377 +Eval (nsd-val): [78] [61/62] eta: 0:00:00 loss: 0.9162 (0.9194) time: 0.1331 data: 0.1077 max mem: 9377 +Eval (nsd-val): [78] Total time: 0:00:15 (0.2578 s / it) +Averaged stats (nsd-val): loss: 0.9162 (0.9194) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [79] [ 0/6250] eta: 11:43:04 lr: 0.000014 grad: 0.2362 (0.2362) loss: 0.7207 (0.7207) time: 6.7495 data: 6.5702 max mem: 9377 +Train: [79] [ 100/6250] eta: 0:25:40 lr: 0.000014 grad: 0.2764 (0.2967) loss: 0.6759 (0.6798) time: 0.1844 data: 0.0664 max mem: 9377 +Train: [79] [ 200/6250] eta: 0:21:36 lr: 0.000014 grad: 0.2617 (0.2935) loss: 0.6444 (0.6742) time: 0.1634 data: 0.0457 max mem: 9377 +Train: [79] [ 300/6250] eta: 0:20:30 lr: 0.000014 grad: 0.2337 (0.2790) loss: 0.6432 (0.6690) time: 0.2004 data: 0.0969 max mem: 9377 +Train: [79] [ 400/6250] eta: 0:19:24 lr: 0.000014 grad: 0.2189 (0.2724) loss: 0.6548 (0.6657) time: 0.1675 data: 0.0540 max mem: 9377 +Train: [79] [ 500/6250] eta: 0:18:40 lr: 0.000014 grad: 0.2119 (0.2696) loss: 0.6482 (0.6636) time: 0.1754 data: 0.0717 max mem: 9377 +Train: [79] [ 600/6250] eta: 0:17:53 lr: 0.000014 grad: 0.2024 (0.2618) loss: 0.6686 (0.6629) time: 0.1696 data: 0.0794 max mem: 9377 +Train: [79] [ 700/6250] eta: 0:17:11 lr: 0.000014 grad: 0.2053 (0.2563) loss: 0.6394 (0.6612) time: 0.1428 data: 0.0453 max mem: 9377 +Train: [79] [ 800/6250] eta: 0:16:39 lr: 0.000014 grad: 0.1963 (0.2532) loss: 0.6443 (0.6598) time: 0.1803 data: 0.0934 max mem: 9377 +Train: [79] [ 900/6250] eta: 0:16:15 lr: 0.000014 grad: 0.2133 (0.2503) loss: 0.6605 (0.6597) time: 0.1497 data: 0.0589 max mem: 9377 +Train: [79] [1000/6250] eta: 0:15:44 lr: 0.000014 grad: 0.2011 (0.2467) loss: 0.6754 (0.6598) time: 0.1435 data: 0.0592 max mem: 9377 +Train: [79] [1100/6250] eta: 0:15:21 lr: 0.000014 grad: 0.2010 (0.2453) loss: 0.6592 (0.6600) time: 0.1528 data: 0.0660 max mem: 9377 +Train: [79] [1200/6250] eta: 0:14:55 lr: 0.000014 grad: 0.2122 (0.2447) loss: 0.6545 (0.6599) time: 0.1706 data: 0.0777 max mem: 9377 +Train: [79] [1300/6250] eta: 0:14:35 lr: 0.000014 grad: 0.1995 (0.2435) loss: 0.6691 (0.6600) time: 0.1666 data: 0.0681 max mem: 9377 +Train: [79] [1400/6250] eta: 0:14:14 lr: 0.000014 grad: 0.1979 (0.2415) loss: 0.6584 (0.6600) time: 0.1575 data: 0.0763 max mem: 9377 +Train: [79] [1500/6250] eta: 0:13:53 lr: 0.000014 grad: 0.2042 (0.2402) loss: 0.6562 (0.6596) time: 0.1526 data: 0.0651 max mem: 9377 +Train: [79] [1600/6250] eta: 0:13:30 lr: 0.000014 grad: 0.2062 (0.2386) loss: 0.6652 (0.6598) time: 0.1440 data: 0.0456 max mem: 9377 +Train: [79] [1700/6250] eta: 0:13:07 lr: 0.000014 grad: 0.1996 (0.2373) loss: 0.6533 (0.6597) time: 0.1470 data: 0.0591 max mem: 9377 +Train: [79] [1800/6250] eta: 0:12:47 lr: 0.000014 grad: 0.1982 (0.2364) loss: 0.6704 (0.6597) time: 0.1998 data: 0.1177 max mem: 9377 +Train: [79] [1900/6250] eta: 0:12:23 lr: 0.000014 grad: 0.1935 (0.2359) loss: 0.6550 (0.6598) time: 0.1318 data: 0.0344 max mem: 9377 +Train: [79] [2000/6250] eta: 0:12:03 lr: 0.000014 grad: 0.1942 (0.2350) loss: 0.6727 (0.6602) time: 0.1692 data: 0.0874 max mem: 9377 +Train: [79] [2100/6250] eta: 0:11:43 lr: 0.000014 grad: 0.1904 (0.2347) loss: 0.6614 (0.6606) time: 0.1247 data: 0.0352 max mem: 9377 +Train: [79] [2200/6250] eta: 0:11:24 lr: 0.000014 grad: 0.2012 (0.2348) loss: 0.6655 (0.6607) time: 0.1229 data: 0.0331 max mem: 9377 +Train: [79] [2300/6250] eta: 0:11:05 lr: 0.000014 grad: 0.2006 (0.2357) loss: 0.6581 (0.6608) time: 0.1578 data: 0.0677 max mem: 9377 +Train: [79] [2400/6250] eta: 0:10:47 lr: 0.000014 grad: 0.2001 (0.2361) loss: 0.6566 (0.6607) time: 0.1509 data: 0.0582 max mem: 9377 +Train: [79] [2500/6250] eta: 0:10:28 lr: 0.000014 grad: 0.1969 (0.2361) loss: 0.6650 (0.6607) time: 0.1375 data: 0.0519 max mem: 9377 +Train: [79] [2600/6250] eta: 0:10:13 lr: 0.000014 grad: 0.2038 (0.2356) loss: 0.6461 (0.6605) time: 0.1728 data: 0.0845 max mem: 9377 +Train: [79] [2700/6250] eta: 0:09:55 lr: 0.000014 grad: 0.1994 (0.2354) loss: 0.6436 (0.6601) time: 0.1615 data: 0.0814 max mem: 9377 +Train: [79] [2800/6250] eta: 0:09:38 lr: 0.000014 grad: 0.1930 (0.2354) loss: 0.6528 (0.6598) time: 0.1783 data: 0.0951 max mem: 9377 +Train: [79] [2900/6250] eta: 0:09:20 lr: 0.000014 grad: 0.1988 (0.2362) loss: 0.6492 (0.6596) time: 0.1636 data: 0.0760 max mem: 9377 +Train: [79] [3000/6250] eta: 0:09:03 lr: 0.000014 grad: 0.2049 (0.2366) loss: 0.6598 (0.6593) time: 0.1628 data: 0.0870 max mem: 9377 +Train: [79] [3100/6250] eta: 0:08:47 lr: 0.000014 grad: 0.1989 (0.2364) loss: 0.6441 (0.6591) time: 0.1771 data: 0.0930 max mem: 9377 +Train: [79] [3200/6250] eta: 0:08:29 lr: 0.000014 grad: 0.2069 (0.2361) loss: 0.6461 (0.6589) time: 0.1492 data: 0.0596 max mem: 9377 +Train: [79] [3300/6250] eta: 0:08:11 lr: 0.000014 grad: 0.1968 (0.2355) loss: 0.6537 (0.6587) time: 0.1501 data: 0.0591 max mem: 9377 +Train: [79] [3400/6250] eta: 0:07:54 lr: 0.000014 grad: 0.1962 (0.2354) loss: 0.6510 (0.6585) time: 0.1835 data: 0.0791 max mem: 9377 +Train: [79] [3500/6250] eta: 0:07:37 lr: 0.000014 grad: 0.1989 (0.2352) loss: 0.6596 (0.6584) time: 0.1549 data: 0.0672 max mem: 9377 +Train: [79] [3600/6250] eta: 0:07:20 lr: 0.000014 grad: 0.2068 (0.2356) loss: 0.6559 (0.6583) time: 0.1677 data: 0.0792 max mem: 9377 +Train: [79] [3700/6250] eta: 0:07:02 lr: 0.000014 grad: 0.1973 (0.2353) loss: 0.6664 (0.6584) time: 0.1434 data: 0.0530 max mem: 9377 +Train: [79] [3800/6250] eta: 0:06:45 lr: 0.000014 grad: 0.2025 (0.2356) loss: 0.6619 (0.6584) time: 0.1721 data: 0.0793 max mem: 9377 +Train: [79] [3900/6250] eta: 0:06:28 lr: 0.000014 grad: 0.1976 (0.2360) loss: 0.6600 (0.6585) time: 0.1598 data: 0.0757 max mem: 9377 +Train: [79] [4000/6250] eta: 0:06:11 lr: 0.000014 grad: 0.2011 (0.2360) loss: 0.6671 (0.6586) time: 0.1489 data: 0.0619 max mem: 9377 +Train: [79] [4100/6250] eta: 0:05:54 lr: 0.000014 grad: 0.1995 (0.2358) loss: 0.6672 (0.6586) time: 0.1619 data: 0.0723 max mem: 9377 +Train: [79] [4200/6250] eta: 0:05:37 lr: 0.000014 grad: 0.1969 (0.2353) loss: 0.6705 (0.6586) time: 0.1578 data: 0.0773 max mem: 9377 +Train: [79] [4300/6250] eta: 0:05:20 lr: 0.000014 grad: 0.2006 (0.2348) loss: 0.6629 (0.6586) time: 0.1477 data: 0.0584 max mem: 9377 +Train: [79] [4400/6250] eta: 0:05:04 lr: 0.000014 grad: 0.1977 (0.2346) loss: 0.6519 (0.6585) time: 0.2098 data: 0.1258 max mem: 9377 +Train: [79] [4500/6250] eta: 0:04:48 lr: 0.000014 grad: 0.2027 (0.2344) loss: 0.6383 (0.6584) time: 0.1689 data: 0.0774 max mem: 9377 +Train: [79] [4600/6250] eta: 0:04:31 lr: 0.000014 grad: 0.2011 (0.2343) loss: 0.6599 (0.6583) time: 0.1592 data: 0.0762 max mem: 9377 +Train: [79] [4700/6250] eta: 0:04:15 lr: 0.000013 grad: 0.2088 (0.2345) loss: 0.6487 (0.6582) time: 0.1703 data: 0.0809 max mem: 9377 +Train: [79] [4800/6250] eta: 0:03:58 lr: 0.000013 grad: 0.2018 (0.2349) loss: 0.6442 (0.6581) time: 0.1654 data: 0.0667 max mem: 9377 +Train: [79] [4900/6250] eta: 0:03:42 lr: 0.000013 grad: 0.1904 (0.2348) loss: 0.6583 (0.6580) time: 0.1840 data: 0.0940 max mem: 9377 +Train: [79] [5000/6250] eta: 0:03:25 lr: 0.000013 grad: 0.1993 (0.2351) loss: 0.6547 (0.6580) time: 0.1470 data: 0.0565 max mem: 9377 +Train: [79] [5100/6250] eta: 0:03:09 lr: 0.000013 grad: 0.1934 (0.2351) loss: 0.6722 (0.6581) time: 0.1196 data: 0.0210 max mem: 9377 +Train: [79] [5200/6250] eta: 0:02:52 lr: 0.000013 grad: 0.1974 (0.2348) loss: 0.6610 (0.6582) time: 0.1534 data: 0.0508 max mem: 9377 +Train: [79] [5300/6250] eta: 0:02:35 lr: 0.000013 grad: 0.1990 (0.2349) loss: 0.6630 (0.6583) time: 0.1443 data: 0.0517 max mem: 9377 +Train: [79] [5400/6250] eta: 0:02:19 lr: 0.000013 grad: 0.1998 (0.2352) loss: 0.6574 (0.6584) time: 0.1203 data: 0.0292 max mem: 9377 +Train: [79] [5500/6250] eta: 0:02:02 lr: 0.000013 grad: 0.2011 (0.2359) loss: 0.6754 (0.6585) time: 0.1614 data: 0.0715 max mem: 9377 +Train: [79] [5600/6250] eta: 0:01:46 lr: 0.000013 grad: 0.2106 (0.2368) loss: 0.6484 (0.6585) time: 0.1705 data: 0.0752 max mem: 9377 +Train: [79] [5700/6250] eta: 0:01:29 lr: 0.000013 grad: 0.1977 (0.2368) loss: 0.6680 (0.6585) time: 0.1337 data: 0.0403 max mem: 9377 +Train: [79] [5800/6250] eta: 0:01:13 lr: 0.000013 grad: 0.2006 (0.2367) loss: 0.6681 (0.6587) time: 0.1678 data: 0.0730 max mem: 9377 +Train: [79] [5900/6250] eta: 0:00:57 lr: 0.000013 grad: 0.1913 (0.2363) loss: 0.6716 (0.6588) time: 0.1358 data: 0.0347 max mem: 9377 +Train: [79] [6000/6250] eta: 0:00:40 lr: 0.000013 grad: 0.1955 (0.2360) loss: 0.6613 (0.6589) time: 0.1573 data: 0.0670 max mem: 9377 +Train: [79] [6100/6250] eta: 0:00:24 lr: 0.000013 grad: 0.1915 (0.2360) loss: 0.6657 (0.6591) time: 0.1435 data: 0.0445 max mem: 9377 +Train: [79] [6200/6250] eta: 0:00:08 lr: 0.000013 grad: 0.1957 (0.2364) loss: 0.6654 (0.6591) time: 0.1722 data: 0.0828 max mem: 9377 +Train: [79] [6249/6250] eta: 0:00:00 lr: 0.000013 grad: 0.1968 (0.2363) loss: 0.6495 (0.6592) time: 0.1419 data: 0.0572 max mem: 9377 +Train: [79] Total time: 0:17:03 (0.1637 s / it) +Averaged stats: lr: 0.000013 grad: 0.1968 (0.2363) loss: 0.6495 (0.6592) +Eval (hcp-train-subset): [79] [ 0/62] eta: 0:06:15 loss: 0.8962 (0.8962) time: 6.0590 data: 6.0286 max mem: 9377 +Eval (hcp-train-subset): [79] [61/62] eta: 0:00:00 loss: 0.9062 (0.9081) time: 0.1370 data: 0.1104 max mem: 9377 +Eval (hcp-train-subset): [79] Total time: 0:00:15 (0.2427 s / it) +Averaged stats (hcp-train-subset): loss: 0.9062 (0.9081) +Making plots (hcp-train-subset): example=62 +Eval (hcp-val): [79] [ 0/62] eta: 0:04:36 loss: 0.9172 (0.9172) time: 4.4532 data: 4.3682 max mem: 9377 +Eval (hcp-val): [79] [61/62] eta: 0:00:00 loss: 0.9067 (0.9092) time: 0.1223 data: 0.0968 max mem: 9377 +Eval (hcp-val): [79] Total time: 0:00:15 (0.2443 s / it) +Averaged stats (hcp-val): loss: 0.9067 (0.9092) +Making plots (hcp-val): example=2 +Eval (nsd-val): [79] [ 0/62] eta: 0:04:58 loss: 0.9077 (0.9077) time: 4.8116 data: 4.7310 max mem: 9377 +Eval (nsd-val): [79] [61/62] eta: 0:00:00 loss: 0.9090 (0.9127) time: 0.1229 data: 0.0976 max mem: 9377 +Eval (nsd-val): [79] Total time: 0:00:15 (0.2439 s / it) +Averaged stats (nsd-val): loss: 0.9090 (0.9127) +Making plots (nsd-val): example=11 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00079.pth +Train: [80] [ 0/6250] eta: 10:28:39 lr: 0.000013 grad: 0.1948 (0.1948) loss: 0.7766 (0.7766) time: 6.0351 data: 5.8302 max mem: 9377 +Train: [80] [ 100/6250] eta: 0:22:48 lr: 0.000013 grad: 0.2533 (0.2926) loss: 0.6540 (0.6888) time: 0.1642 data: 0.0571 max mem: 9377 +Train: [80] [ 200/6250] eta: 0:19:55 lr: 0.000013 grad: 0.2532 (0.2826) loss: 0.6424 (0.6692) time: 0.1566 data: 0.0517 max mem: 9377 +Train: [80] [ 300/6250] eta: 0:18:49 lr: 0.000013 grad: 0.2169 (0.2737) loss: 0.6635 (0.6650) time: 0.1611 data: 0.0571 max mem: 9377 +Train: [80] [ 400/6250] eta: 0:18:01 lr: 0.000013 grad: 0.2093 (0.2674) loss: 0.6725 (0.6639) time: 0.1856 data: 0.0910 max mem: 9377 +Train: [80] [ 500/6250] eta: 0:17:11 lr: 0.000013 grad: 0.2006 (0.2606) loss: 0.6580 (0.6631) time: 0.1434 data: 0.0436 max mem: 9377 +Train: [80] [ 600/6250] eta: 0:16:45 lr: 0.000013 grad: 0.1977 (0.2558) loss: 0.6673 (0.6640) time: 0.1750 data: 0.0768 max mem: 9377 +Train: [80] [ 700/6250] eta: 0:16:12 lr: 0.000013 grad: 0.1991 (0.2535) loss: 0.6529 (0.6630) time: 0.1694 data: 0.0739 max mem: 9377 +Train: [80] [ 800/6250] eta: 0:15:56 lr: 0.000013 grad: 0.2090 (0.2511) loss: 0.6605 (0.6631) time: 0.1690 data: 0.0794 max mem: 9377 +Train: [80] [ 900/6250] eta: 0:15:34 lr: 0.000013 grad: 0.2001 (0.2483) loss: 0.6451 (0.6627) time: 0.1700 data: 0.0710 max mem: 9377 +Train: [80] [1000/6250] eta: 0:15:11 lr: 0.000013 grad: 0.1982 (0.2474) loss: 0.6484 (0.6622) time: 0.1835 data: 0.0820 max mem: 9377 +Train: [80] [1100/6250] eta: 0:14:44 lr: 0.000013 grad: 0.1926 (0.2460) loss: 0.6535 (0.6617) time: 0.1640 data: 0.0712 max mem: 9377 +Train: [80] [1200/6250] eta: 0:14:24 lr: 0.000013 grad: 0.1993 (0.2440) loss: 0.6511 (0.6613) time: 0.2176 data: 0.1150 max mem: 9377 +Train: [80] [1300/6250] eta: 0:14:02 lr: 0.000013 grad: 0.1957 (0.2427) loss: 0.6504 (0.6613) time: 0.1535 data: 0.0625 max mem: 9377 +Train: [80] [1400/6250] eta: 0:13:43 lr: 0.000013 grad: 0.2003 (0.2429) loss: 0.6539 (0.6612) time: 0.1534 data: 0.0557 max mem: 9377 +Train: [80] [1500/6250] eta: 0:13:22 lr: 0.000013 grad: 0.1939 (0.2406) loss: 0.6656 (0.6611) time: 0.1623 data: 0.0742 max mem: 9377 +Train: [80] [1600/6250] eta: 0:12:59 lr: 0.000013 grad: 0.1980 (0.2401) loss: 0.6509 (0.6608) time: 0.1563 data: 0.0767 max mem: 9377 +Train: [80] [1700/6250] eta: 0:12:38 lr: 0.000013 grad: 0.2099 (0.2395) loss: 0.6547 (0.6606) time: 0.1606 data: 0.0691 max mem: 9377 +Train: [80] [1800/6250] eta: 0:12:18 lr: 0.000013 grad: 0.1997 (0.2380) loss: 0.6682 (0.6607) time: 0.1595 data: 0.0700 max mem: 9377 +Train: [80] [1900/6250] eta: 0:11:59 lr: 0.000013 grad: 0.1996 (0.2382) loss: 0.6658 (0.6608) time: 0.1138 data: 0.0322 max mem: 9377 +Train: [80] [2000/6250] eta: 0:11:42 lr: 0.000013 grad: 0.1975 (0.2372) loss: 0.6573 (0.6608) time: 0.1521 data: 0.0711 max mem: 9377 +Train: [80] [2100/6250] eta: 0:11:24 lr: 0.000013 grad: 0.2011 (0.2366) loss: 0.6648 (0.6611) time: 0.1562 data: 0.0643 max mem: 9377 +Train: [80] [2200/6250] eta: 0:11:06 lr: 0.000013 grad: 0.1954 (0.2359) loss: 0.6614 (0.6614) time: 0.1509 data: 0.0639 max mem: 9377 +Train: [80] [2300/6250] eta: 0:10:47 lr: 0.000013 grad: 0.1902 (0.2355) loss: 0.6763 (0.6615) time: 0.1405 data: 0.0450 max mem: 9377 +Train: [80] [2400/6250] eta: 0:10:32 lr: 0.000013 grad: 0.1964 (0.2353) loss: 0.6533 (0.6616) time: 0.1911 data: 0.1022 max mem: 9377 +Train: [80] [2500/6250] eta: 0:10:18 lr: 0.000013 grad: 0.1960 (0.2344) loss: 0.6660 (0.6617) time: 0.1334 data: 0.0536 max mem: 9377 +Train: [80] [2600/6250] eta: 0:10:03 lr: 0.000013 grad: 0.1962 (0.2339) loss: 0.6578 (0.6617) time: 0.1871 data: 0.1065 max mem: 9377 +Train: [80] [2700/6250] eta: 0:09:48 lr: 0.000013 grad: 0.1936 (0.2333) loss: 0.6492 (0.6617) time: 0.1631 data: 0.0690 max mem: 9377 +Train: [80] [2800/6250] eta: 0:09:34 lr: 0.000013 grad: 0.1951 (0.2330) loss: 0.6644 (0.6618) time: 0.1646 data: 0.0814 max mem: 9377 +Train: [80] [2900/6250] eta: 0:09:20 lr: 0.000013 grad: 0.1999 (0.2330) loss: 0.6614 (0.6618) time: 0.1848 data: 0.0998 max mem: 9377 +Train: [80] [3000/6250] eta: 0:09:05 lr: 0.000013 grad: 0.1905 (0.2327) loss: 0.6538 (0.6619) time: 0.1904 data: 0.1026 max mem: 9377 +Train: [80] [3100/6250] eta: 0:08:49 lr: 0.000013 grad: 0.2030 (0.2322) loss: 0.6538 (0.6618) time: 0.1600 data: 0.0671 max mem: 9377 +Train: [80] [3200/6250] eta: 0:08:32 lr: 0.000013 grad: 0.1983 (0.2320) loss: 0.6639 (0.6618) time: 0.1693 data: 0.0842 max mem: 9377 +Train: [80] [3300/6250] eta: 0:08:14 lr: 0.000013 grad: 0.1975 (0.2320) loss: 0.6795 (0.6620) time: 0.1626 data: 0.0719 max mem: 9377 +Train: [80] [3400/6250] eta: 0:07:56 lr: 0.000012 grad: 0.1950 (0.2316) loss: 0.6571 (0.6621) time: 0.1547 data: 0.0668 max mem: 9377 +Train: [80] [3500/6250] eta: 0:07:39 lr: 0.000012 grad: 0.1936 (0.2314) loss: 0.6667 (0.6623) time: 0.1670 data: 0.0813 max mem: 9377 +Train: [80] [3600/6250] eta: 0:07:21 lr: 0.000012 grad: 0.1989 (0.2312) loss: 0.6607 (0.6623) time: 0.1358 data: 0.0442 max mem: 9377 +Train: [80] [3700/6250] eta: 0:07:06 lr: 0.000012 grad: 0.1928 (0.2314) loss: 0.6727 (0.6623) time: 0.1724 data: 0.0868 max mem: 9377 +Train: [80] [3800/6250] eta: 0:06:49 lr: 0.000012 grad: 0.2037 (0.2314) loss: 0.6530 (0.6622) time: 0.1456 data: 0.0680 max mem: 9377 +Train: [80] [3900/6250] eta: 0:06:32 lr: 0.000012 grad: 0.1956 (0.2310) loss: 0.6612 (0.6622) time: 0.1824 data: 0.0977 max mem: 9377 +Train: [80] [4000/6250] eta: 0:06:15 lr: 0.000012 grad: 0.1984 (0.2304) loss: 0.6617 (0.6623) time: 0.1744 data: 0.0917 max mem: 9377 +Train: [80] [4100/6250] eta: 0:05:58 lr: 0.000012 grad: 0.1965 (0.2301) loss: 0.6463 (0.6623) time: 0.1394 data: 0.0612 max mem: 9377 +Train: [80] [4200/6250] eta: 0:05:41 lr: 0.000012 grad: 0.1936 (0.2296) loss: 0.6606 (0.6622) time: 0.1189 data: 0.0272 max mem: 9377 +Train: [80] [4300/6250] eta: 0:05:24 lr: 0.000012 grad: 0.1982 (0.2290) loss: 0.6465 (0.6621) time: 0.1723 data: 0.0889 max mem: 9377 +Train: [80] [4400/6250] eta: 0:05:08 lr: 0.000012 grad: 0.1991 (0.2286) loss: 0.6591 (0.6621) time: 0.1662 data: 0.0850 max mem: 9377 +Train: [80] [4500/6250] eta: 0:04:52 lr: 0.000012 grad: 0.1929 (0.2285) loss: 0.6614 (0.6621) time: 0.1983 data: 0.1147 max mem: 9377 +Train: [80] [4600/6250] eta: 0:04:36 lr: 0.000012 grad: 0.1945 (0.2282) loss: 0.6642 (0.6622) time: 0.1798 data: 0.0972 max mem: 9377 +Train: [80] [4700/6250] eta: 0:04:19 lr: 0.000012 grad: 0.2015 (0.2281) loss: 0.6601 (0.6623) time: 0.1293 data: 0.0373 max mem: 9377 +Train: [80] [4800/6250] eta: 0:04:03 lr: 0.000012 grad: 0.1935 (0.2283) loss: 0.6632 (0.6624) time: 0.1474 data: 0.0545 max mem: 9377 +Train: [80] [4900/6250] eta: 0:03:47 lr: 0.000012 grad: 0.1968 (0.2288) loss: 0.6728 (0.6624) time: 0.2053 data: 0.1126 max mem: 9377 +Train: [80] [5000/6250] eta: 0:03:30 lr: 0.000012 grad: 0.1961 (0.2287) loss: 0.6696 (0.6626) time: 0.1546 data: 0.0628 max mem: 9377 +Train: [80] [5100/6250] eta: 0:03:13 lr: 0.000012 grad: 0.1948 (0.2284) loss: 0.6743 (0.6627) time: 0.1657 data: 0.0790 max mem: 9377 +Train: [80] [5200/6250] eta: 0:02:56 lr: 0.000012 grad: 0.1991 (0.2287) loss: 0.6597 (0.6627) time: 0.1748 data: 0.0872 max mem: 9377 +Train: [80] [5300/6250] eta: 0:02:39 lr: 0.000012 grad: 0.1938 (0.2286) loss: 0.6604 (0.6627) time: 0.1686 data: 0.0846 max mem: 9377 +Train: [80] [5400/6250] eta: 0:02:22 lr: 0.000012 grad: 0.1956 (0.2285) loss: 0.6638 (0.6627) time: 0.1483 data: 0.0531 max mem: 9377 +Train: [80] [5500/6250] eta: 0:02:05 lr: 0.000012 grad: 0.1993 (0.2282) loss: 0.6525 (0.6627) time: 0.1671 data: 0.0765 max mem: 9377 +Train: [80] [5600/6250] eta: 0:01:49 lr: 0.000012 grad: 0.1906 (0.2280) loss: 0.6705 (0.6628) time: 0.1769 data: 0.0905 max mem: 9377 +Train: [80] [5700/6250] eta: 0:01:32 lr: 0.000012 grad: 0.2002 (0.2278) loss: 0.6468 (0.6628) time: 0.1920 data: 0.1064 max mem: 9377 +Train: [80] [5800/6250] eta: 0:01:15 lr: 0.000012 grad: 0.1976 (0.2275) loss: 0.6539 (0.6628) time: 0.1710 data: 0.0862 max mem: 9377 +Train: [80] [5900/6250] eta: 0:00:58 lr: 0.000012 grad: 0.1992 (0.2276) loss: 0.6618 (0.6628) time: 0.1334 data: 0.0553 max mem: 9377 +Train: [80] [6000/6250] eta: 0:00:41 lr: 0.000012 grad: 0.2000 (0.2275) loss: 0.6573 (0.6627) time: 0.1672 data: 0.0816 max mem: 9377 +Train: [80] [6100/6250] eta: 0:00:25 lr: 0.000012 grad: 0.1977 (0.2275) loss: 0.6559 (0.6626) time: 0.1544 data: 0.0723 max mem: 9377 +Train: [80] [6200/6250] eta: 0:00:08 lr: 0.000012 grad: 0.2032 (0.2273) loss: 0.6578 (0.6625) time: 0.1761 data: 0.0788 max mem: 9377 +Train: [80] [6249/6250] eta: 0:00:00 lr: 0.000012 grad: 0.1983 (0.2275) loss: 0.6581 (0.6625) time: 0.1718 data: 0.0869 max mem: 9377 +Train: [80] Total time: 0:17:30 (0.1680 s / it) +Averaged stats: lr: 0.000012 grad: 0.1983 (0.2275) loss: 0.6581 (0.6625) +Eval (hcp-train-subset): [80] [ 0/62] eta: 0:05:27 loss: 0.8936 (0.8936) time: 5.2804 data: 5.2392 max mem: 9377 +Eval (hcp-train-subset): [80] [61/62] eta: 0:00:00 loss: 0.9092 (0.9092) time: 0.1501 data: 0.1244 max mem: 9377 +Eval (hcp-train-subset): [80] Total time: 0:00:15 (0.2458 s / it) +Averaged stats (hcp-train-subset): loss: 0.9092 (0.9092) +Eval (hcp-val): [80] [ 0/62] eta: 0:06:24 loss: 0.9159 (0.9159) time: 6.2063 data: 6.1753 max mem: 9377 +Eval (hcp-val): [80] [61/62] eta: 0:00:00 loss: 0.9095 (0.9107) time: 0.1535 data: 0.1259 max mem: 9377 +Eval (hcp-val): [80] Total time: 0:00:15 (0.2459 s / it) +Averaged stats (hcp-val): loss: 0.9095 (0.9107) +Eval (nsd-val): [80] [ 0/62] eta: 0:05:34 loss: 0.9044 (0.9044) time: 5.4012 data: 5.3711 max mem: 9377 +Eval (nsd-val): [80] [61/62] eta: 0:00:00 loss: 0.9071 (0.9099) time: 0.1418 data: 0.1146 max mem: 9377 +Eval (nsd-val): [80] Total time: 0:00:15 (0.2463 s / it) +Averaged stats (nsd-val): loss: 0.9071 (0.9099) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [81] [ 0/6250] eta: 12:51:40 lr: 0.000012 grad: 0.2584 (0.2584) loss: 0.7091 (0.7091) time: 7.4080 data: 7.2702 max mem: 9377 +Train: [81] [ 100/6250] eta: 0:23:44 lr: 0.000012 grad: 0.2424 (0.2869) loss: 0.6397 (0.6508) time: 0.1660 data: 0.0618 max mem: 9377 +Train: [81] [ 200/6250] eta: 0:20:25 lr: 0.000012 grad: 0.2370 (0.2737) loss: 0.6641 (0.6544) time: 0.1623 data: 0.0592 max mem: 9377 +Train: [81] [ 300/6250] eta: 0:18:54 lr: 0.000012 grad: 0.2262 (0.2880) loss: 0.6657 (0.6579) time: 0.1559 data: 0.0494 max mem: 9377 +Train: [81] [ 400/6250] eta: 0:18:12 lr: 0.000012 grad: 0.2165 (0.2784) loss: 0.6656 (0.6571) time: 0.1708 data: 0.0687 max mem: 9377 +Train: [81] [ 500/6250] eta: 0:17:36 lr: 0.000012 grad: 0.2169 (0.2736) loss: 0.6666 (0.6586) time: 0.1753 data: 0.0765 max mem: 9377 +Train: [81] [ 600/6250] eta: 0:16:50 lr: 0.000012 grad: 0.1994 (0.2670) loss: 0.6610 (0.6603) time: 0.1522 data: 0.0544 max mem: 9377 +Train: [81] [ 700/6250] eta: 0:16:22 lr: 0.000012 grad: 0.1998 (0.2661) loss: 0.6695 (0.6616) time: 0.1847 data: 0.0904 max mem: 9377 +Train: [81] [ 800/6250] eta: 0:15:54 lr: 0.000012 grad: 0.1956 (0.2671) loss: 0.6675 (0.6618) time: 0.1542 data: 0.0610 max mem: 9377 +Train: [81] [ 900/6250] eta: 0:15:36 lr: 0.000012 grad: 0.1966 (0.2619) loss: 0.6684 (0.6626) time: 0.1641 data: 0.0718 max mem: 9377 +Train: [81] [1000/6250] eta: 0:15:10 lr: 0.000012 grad: 0.1981 (0.2589) loss: 0.6675 (0.6632) time: 0.1478 data: 0.0609 max mem: 9377 +Train: [81] [1100/6250] eta: 0:14:49 lr: 0.000012 grad: 0.1977 (0.2576) loss: 0.6607 (0.6637) time: 0.1625 data: 0.0726 max mem: 9377 +Train: [81] [1200/6250] eta: 0:14:26 lr: 0.000012 grad: 0.1971 (0.2572) loss: 0.6686 (0.6639) time: 0.1638 data: 0.0719 max mem: 9377 +Train: [81] [1300/6250] eta: 0:14:05 lr: 0.000012 grad: 0.1937 (0.2542) loss: 0.6721 (0.6645) time: 0.1569 data: 0.0617 max mem: 9377 +Train: [81] [1400/6250] eta: 0:13:45 lr: 0.000012 grad: 0.1928 (0.2540) loss: 0.6645 (0.6649) time: 0.1635 data: 0.0693 max mem: 9377 +Train: [81] [1500/6250] eta: 0:13:22 lr: 0.000012 grad: 0.1993 (0.2533) loss: 0.6723 (0.6651) time: 0.1581 data: 0.0626 max mem: 9377 +Train: [81] [1600/6250] eta: 0:13:03 lr: 0.000012 grad: 0.2002 (0.2543) loss: 0.6501 (0.6650) time: 0.1723 data: 0.0817 max mem: 9377 +Train: [81] [1700/6250] eta: 0:12:40 lr: 0.000012 grad: 0.1960 (0.2532) loss: 0.6649 (0.6651) time: 0.1458 data: 0.0526 max mem: 9377 +Train: [81] [1800/6250] eta: 0:12:18 lr: 0.000012 grad: 0.2015 (0.2523) loss: 0.6558 (0.6652) time: 0.1353 data: 0.0391 max mem: 9377 +Train: [81] [1900/6250] eta: 0:11:58 lr: 0.000012 grad: 0.1920 (0.2502) loss: 0.6689 (0.6653) time: 0.1729 data: 0.0810 max mem: 9377 +Train: [81] [2000/6250] eta: 0:11:39 lr: 0.000012 grad: 0.2077 (0.2489) loss: 0.6552 (0.6654) time: 0.1609 data: 0.0676 max mem: 9377 +Train: [81] [2100/6250] eta: 0:11:21 lr: 0.000012 grad: 0.1904 (0.2473) loss: 0.6782 (0.6654) time: 0.1365 data: 0.0407 max mem: 9377 +Train: [81] [2200/6250] eta: 0:11:02 lr: 0.000012 grad: 0.1952 (0.2457) loss: 0.6569 (0.6654) time: 0.1382 data: 0.0451 max mem: 9377 +Train: [81] [2300/6250] eta: 0:10:44 lr: 0.000011 grad: 0.1922 (0.2448) loss: 0.6613 (0.6655) time: 0.1468 data: 0.0628 max mem: 9377 +Train: [81] [2400/6250] eta: 0:10:28 lr: 0.000011 grad: 0.1947 (0.2434) loss: 0.6737 (0.6657) time: 0.1901 data: 0.1048 max mem: 9377 +Train: [81] [2500/6250] eta: 0:10:09 lr: 0.000011 grad: 0.1912 (0.2428) loss: 0.6699 (0.6660) time: 0.1728 data: 0.0802 max mem: 9377 +Train: [81] [2600/6250] eta: 0:09:54 lr: 0.000011 grad: 0.1903 (0.2421) loss: 0.6736 (0.6660) time: 0.1682 data: 0.0899 max mem: 9377 +Train: [81] [2700/6250] eta: 0:09:38 lr: 0.000011 grad: 0.1939 (0.2411) loss: 0.6630 (0.6660) time: 0.1873 data: 0.1036 max mem: 9377 +Train: [81] [2800/6250] eta: 0:09:22 lr: 0.000011 grad: 0.1961 (0.2405) loss: 0.6579 (0.6661) time: 0.1864 data: 0.0991 max mem: 9377 +Train: [81] [2900/6250] eta: 0:09:06 lr: 0.000011 grad: 0.1904 (0.2402) loss: 0.6701 (0.6662) time: 0.1592 data: 0.0720 max mem: 9377 +Train: [81] [3000/6250] eta: 0:08:48 lr: 0.000011 grad: 0.2036 (0.2403) loss: 0.6487 (0.6662) time: 0.1440 data: 0.0555 max mem: 9377 +Train: [81] [3100/6250] eta: 0:08:33 lr: 0.000011 grad: 0.1938 (0.2403) loss: 0.6673 (0.6662) time: 0.1846 data: 0.0991 max mem: 9377 +Train: [81] [3200/6250] eta: 0:08:17 lr: 0.000011 grad: 0.1977 (0.2398) loss: 0.6738 (0.6663) time: 0.1660 data: 0.0714 max mem: 9377 +Train: [81] [3300/6250] eta: 0:08:00 lr: 0.000011 grad: 0.2003 (0.2394) loss: 0.6586 (0.6663) time: 0.1624 data: 0.0673 max mem: 9377 +Train: [81] [3400/6250] eta: 0:07:43 lr: 0.000011 grad: 0.1943 (0.2393) loss: 0.6713 (0.6664) time: 0.1572 data: 0.0576 max mem: 9377 +Train: [81] [3500/6250] eta: 0:07:26 lr: 0.000011 grad: 0.1946 (0.2395) loss: 0.6626 (0.6664) time: 0.1481 data: 0.0514 max mem: 9377 +Train: [81] [3600/6250] eta: 0:07:09 lr: 0.000011 grad: 0.2001 (0.2390) loss: 0.6549 (0.6663) time: 0.1385 data: 0.0412 max mem: 9377 +Train: [81] [3700/6250] eta: 0:06:51 lr: 0.000011 grad: 0.1926 (0.2385) loss: 0.6724 (0.6663) time: 0.1398 data: 0.0484 max mem: 9377 +Train: [81] [3800/6250] eta: 0:06:35 lr: 0.000011 grad: 0.1988 (0.2385) loss: 0.6651 (0.6664) time: 0.1661 data: 0.0780 max mem: 9377 +Train: [81] [3900/6250] eta: 0:06:19 lr: 0.000011 grad: 0.1929 (0.2381) loss: 0.6713 (0.6664) time: 0.1552 data: 0.0667 max mem: 9377 +Train: [81] [4000/6250] eta: 0:06:03 lr: 0.000011 grad: 0.1932 (0.2379) loss: 0.6690 (0.6663) time: 0.1795 data: 0.0931 max mem: 9377 +Train: [81] [4100/6250] eta: 0:05:47 lr: 0.000011 grad: 0.2014 (0.2373) loss: 0.6513 (0.6662) time: 0.1574 data: 0.0667 max mem: 9377 +Train: [81] [4200/6250] eta: 0:05:30 lr: 0.000011 grad: 0.1990 (0.2369) loss: 0.6613 (0.6662) time: 0.1468 data: 0.0624 max mem: 9377 +Train: [81] [4300/6250] eta: 0:05:14 lr: 0.000011 grad: 0.2004 (0.2367) loss: 0.6840 (0.6663) time: 0.1638 data: 0.0727 max mem: 9377 +Train: [81] [4400/6250] eta: 0:04:57 lr: 0.000011 grad: 0.1985 (0.2363) loss: 0.6743 (0.6664) time: 0.1635 data: 0.0736 max mem: 9377 +Train: [81] [4500/6250] eta: 0:04:41 lr: 0.000011 grad: 0.2096 (0.2367) loss: 0.6759 (0.6665) time: 0.1414 data: 0.0495 max mem: 9377 +Train: [81] [4600/6250] eta: 0:04:25 lr: 0.000011 grad: 0.2004 (0.2364) loss: 0.6662 (0.6665) time: 0.1420 data: 0.0492 max mem: 9377 +Train: [81] [4700/6250] eta: 0:04:09 lr: 0.000011 grad: 0.2009 (0.2366) loss: 0.6679 (0.6667) time: 0.1583 data: 0.0750 max mem: 9377 +Train: [81] [4800/6250] eta: 0:03:54 lr: 0.000011 grad: 0.1951 (0.2364) loss: 0.6657 (0.6668) time: 0.1535 data: 0.0706 max mem: 9377 +Train: [81] [4900/6250] eta: 0:03:38 lr: 0.000011 grad: 0.2044 (0.2360) loss: 0.6565 (0.6667) time: 0.1764 data: 0.0858 max mem: 9377 +Train: [81] [5000/6250] eta: 0:03:22 lr: 0.000011 grad: 0.1989 (0.2358) loss: 0.6633 (0.6667) time: 0.1589 data: 0.0689 max mem: 9377 +Train: [81] [5100/6250] eta: 0:03:06 lr: 0.000011 grad: 0.1971 (0.2359) loss: 0.6697 (0.6667) time: 0.1752 data: 0.0817 max mem: 9377 +Train: [81] [5200/6250] eta: 0:02:50 lr: 0.000011 grad: 0.1954 (0.2356) loss: 0.6519 (0.6667) time: 0.1524 data: 0.0640 max mem: 9377 +Train: [81] [5300/6250] eta: 0:02:33 lr: 0.000011 grad: 0.1989 (0.2354) loss: 0.6572 (0.6666) time: 0.1410 data: 0.0438 max mem: 9377 +Train: [81] [5400/6250] eta: 0:02:17 lr: 0.000011 grad: 0.1948 (0.2349) loss: 0.6598 (0.6665) time: 0.1401 data: 0.0429 max mem: 9377 +Train: [81] [5500/6250] eta: 0:02:01 lr: 0.000011 grad: 0.2010 (0.2353) loss: 0.6542 (0.6663) time: 0.1631 data: 0.0708 max mem: 9377 +Train: [81] [5600/6250] eta: 0:01:45 lr: 0.000011 grad: 0.1974 (0.2351) loss: 0.6414 (0.6662) time: 0.1533 data: 0.0584 max mem: 9377 +Train: [81] [5700/6250] eta: 0:01:28 lr: 0.000011 grad: 0.1922 (0.2346) loss: 0.6682 (0.6661) time: 0.1416 data: 0.0482 max mem: 9377 +Train: [81] [5800/6250] eta: 0:01:12 lr: 0.000011 grad: 0.1951 (0.2346) loss: 0.6640 (0.6660) time: 0.1639 data: 0.0795 max mem: 9377 +Train: [81] [5900/6250] eta: 0:00:56 lr: 0.000011 grad: 0.1970 (0.2345) loss: 0.6742 (0.6659) time: 0.1610 data: 0.0734 max mem: 9377 +Train: [81] [6000/6250] eta: 0:00:40 lr: 0.000011 grad: 0.1972 (0.2344) loss: 0.6583 (0.6659) time: 0.1412 data: 0.0428 max mem: 9377 +Train: [81] [6100/6250] eta: 0:00:24 lr: 0.000011 grad: 0.1955 (0.2345) loss: 0.6767 (0.6658) time: 0.1614 data: 0.0788 max mem: 9377 +Train: [81] [6200/6250] eta: 0:00:08 lr: 0.000011 grad: 0.1900 (0.2340) loss: 0.6674 (0.6658) time: 0.1701 data: 0.0837 max mem: 9377 +Train: [81] [6249/6250] eta: 0:00:00 lr: 0.000011 grad: 0.1998 (0.2339) loss: 0.6453 (0.6657) time: 0.1450 data: 0.0517 max mem: 9377 +Train: [81] Total time: 0:16:52 (0.1620 s / it) +Averaged stats: lr: 0.000011 grad: 0.1998 (0.2339) loss: 0.6453 (0.6657) +Eval (hcp-train-subset): [81] [ 0/62] eta: 0:06:03 loss: 0.8980 (0.8980) time: 5.8660 data: 5.8357 max mem: 9377 +Eval (hcp-train-subset): [81] [61/62] eta: 0:00:00 loss: 0.9108 (0.9101) time: 0.1542 data: 0.1291 max mem: 9377 +Eval (hcp-train-subset): [81] Total time: 0:00:15 (0.2466 s / it) +Averaged stats (hcp-train-subset): loss: 0.9108 (0.9101) +Eval (hcp-val): [81] [ 0/62] eta: 0:06:26 loss: 0.9060 (0.9060) time: 6.2287 data: 6.1972 max mem: 9377 +Eval (hcp-val): [81] [61/62] eta: 0:00:00 loss: 0.9105 (0.9090) time: 0.1369 data: 0.1119 max mem: 9377 +Eval (hcp-val): [81] Total time: 0:00:15 (0.2441 s / it) +Averaged stats (hcp-val): loss: 0.9105 (0.9090) +Eval (nsd-val): [81] [ 0/62] eta: 0:03:48 loss: 0.9102 (0.9102) time: 3.6928 data: 3.6141 max mem: 9377 +Eval (nsd-val): [81] [61/62] eta: 0:00:00 loss: 0.9088 (0.9122) time: 0.1231 data: 0.0978 max mem: 9377 +Eval (nsd-val): [81] Total time: 0:00:14 (0.2331 s / it) +Averaged stats (nsd-val): loss: 0.9088 (0.9122) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [82] [ 0/6250] eta: 11:21:01 lr: 0.000011 grad: 0.1755 (0.1755) loss: 0.7527 (0.7527) time: 6.5379 data: 6.3946 max mem: 9377 +Train: [82] [ 100/6250] eta: 0:22:30 lr: 0.000011 grad: 0.2438 (0.2921) loss: 0.6597 (0.6583) time: 0.1685 data: 0.0613 max mem: 9377 +Train: [82] [ 200/6250] eta: 0:19:32 lr: 0.000011 grad: 0.2150 (0.2809) loss: 0.6628 (0.6487) time: 0.1347 data: 0.0340 max mem: 9377 +Train: [82] [ 300/6250] eta: 0:18:27 lr: 0.000011 grad: 0.2180 (0.2726) loss: 0.6498 (0.6507) time: 0.1790 data: 0.0893 max mem: 9377 +Train: [82] [ 400/6250] eta: 0:17:28 lr: 0.000011 grad: 0.2045 (0.2638) loss: 0.6668 (0.6555) time: 0.1305 data: 0.0279 max mem: 9377 +Train: [82] [ 500/6250] eta: 0:16:46 lr: 0.000011 grad: 0.1957 (0.2578) loss: 0.6582 (0.6578) time: 0.1502 data: 0.0497 max mem: 9377 +Train: [82] [ 600/6250] eta: 0:16:11 lr: 0.000011 grad: 0.2035 (0.2525) loss: 0.6778 (0.6590) time: 0.1503 data: 0.0598 max mem: 9377 +Train: [82] [ 700/6250] eta: 0:15:38 lr: 0.000011 grad: 0.1988 (0.2552) loss: 0.6381 (0.6586) time: 0.1331 data: 0.0377 max mem: 9377 +Train: [82] [ 800/6250] eta: 0:15:11 lr: 0.000011 grad: 0.2033 (0.2522) loss: 0.6532 (0.6582) time: 0.1514 data: 0.0518 max mem: 9377 +Train: [82] [ 900/6250] eta: 0:14:53 lr: 0.000011 grad: 0.2035 (0.2479) loss: 0.6602 (0.6575) time: 0.1476 data: 0.0639 max mem: 9377 +Train: [82] [1000/6250] eta: 0:14:32 lr: 0.000011 grad: 0.2043 (0.2456) loss: 0.6666 (0.6579) time: 0.1492 data: 0.0547 max mem: 9377 +Train: [82] [1100/6250] eta: 0:14:15 lr: 0.000011 grad: 0.2027 (0.2440) loss: 0.6511 (0.6580) time: 0.1795 data: 0.0894 max mem: 9377 +Train: [82] [1200/6250] eta: 0:13:55 lr: 0.000011 grad: 0.1946 (0.2443) loss: 0.6673 (0.6581) time: 0.1442 data: 0.0553 max mem: 9377 +Train: [82] [1300/6250] eta: 0:13:45 lr: 0.000011 grad: 0.1972 (0.2437) loss: 0.6645 (0.6587) time: 0.1686 data: 0.0760 max mem: 9377 +Train: [82] [1400/6250] eta: 0:13:34 lr: 0.000010 grad: 0.1976 (0.2429) loss: 0.6504 (0.6589) time: 0.1840 data: 0.0919 max mem: 9377 +Train: [82] [1500/6250] eta: 0:13:17 lr: 0.000010 grad: 0.2055 (0.2438) loss: 0.6618 (0.6590) time: 0.1786 data: 0.0835 max mem: 9377 +Train: [82] [1600/6250] eta: 0:13:00 lr: 0.000010 grad: 0.1990 (0.2429) loss: 0.6590 (0.6591) time: 0.1530 data: 0.0561 max mem: 9377 +Train: [82] [1700/6250] eta: 0:12:41 lr: 0.000010 grad: 0.2062 (0.2420) loss: 0.6613 (0.6593) time: 0.1654 data: 0.0727 max mem: 9377 +Train: [82] [1800/6250] eta: 0:12:22 lr: 0.000010 grad: 0.2055 (0.2417) loss: 0.6747 (0.6595) time: 0.1536 data: 0.0596 max mem: 9377 +Train: [82] [1900/6250] eta: 0:12:03 lr: 0.000010 grad: 0.1985 (0.2413) loss: 0.6770 (0.6597) time: 0.1613 data: 0.0723 max mem: 9377 +Train: [82] [2000/6250] eta: 0:11:47 lr: 0.000010 grad: 0.1944 (0.2396) loss: 0.6582 (0.6597) time: 0.2032 data: 0.1099 max mem: 9377 +Train: [82] [2100/6250] eta: 0:11:29 lr: 0.000010 grad: 0.2043 (0.2397) loss: 0.6562 (0.6597) time: 0.1713 data: 0.0853 max mem: 9377 +Train: [82] [2200/6250] eta: 0:11:13 lr: 0.000010 grad: 0.1975 (0.2390) loss: 0.6554 (0.6598) time: 0.1786 data: 0.1007 max mem: 9377 +Train: [82] [2300/6250] eta: 0:10:57 lr: 0.000010 grad: 0.2014 (0.2404) loss: 0.6692 (0.6600) time: 0.1409 data: 0.0614 max mem: 9377 +Train: [82] [2400/6250] eta: 0:10:41 lr: 0.000010 grad: 0.1981 (0.2401) loss: 0.6601 (0.6601) time: 0.1703 data: 0.0925 max mem: 9377 +Train: [82] [2500/6250] eta: 0:10:22 lr: 0.000010 grad: 0.1954 (0.2412) loss: 0.6696 (0.6602) time: 0.1482 data: 0.0654 max mem: 9377 +Train: [82] [2600/6250] eta: 0:10:08 lr: 0.000010 grad: 0.1979 (0.2403) loss: 0.6557 (0.6603) time: 0.2679 data: 0.1886 max mem: 9377 +Train: [82] [2700/6250] eta: 0:09:49 lr: 0.000010 grad: 0.2037 (0.2408) loss: 0.6614 (0.6604) time: 0.1426 data: 0.0614 max mem: 9377 +Train: [82] [2800/6250] eta: 0:09:32 lr: 0.000010 grad: 0.1987 (0.2402) loss: 0.6742 (0.6605) time: 0.1354 data: 0.0583 max mem: 9377 +Train: [82] [2900/6250] eta: 0:09:14 lr: 0.000010 grad: 0.2031 (0.2401) loss: 0.6526 (0.6604) time: 0.1554 data: 0.0714 max mem: 9377 +Train: [82] [3000/6250] eta: 0:08:58 lr: 0.000010 grad: 0.1984 (0.2403) loss: 0.6558 (0.6603) time: 0.1740 data: 0.0821 max mem: 9377 +Train: [82] [3100/6250] eta: 0:08:40 lr: 0.000010 grad: 0.2029 (0.2403) loss: 0.6536 (0.6604) time: 0.1609 data: 0.0712 max mem: 9377 +Train: [82] [3200/6250] eta: 0:08:23 lr: 0.000010 grad: 0.2021 (0.2398) loss: 0.6542 (0.6602) time: 0.1545 data: 0.0571 max mem: 9377 +Train: [82] [3300/6250] eta: 0:08:07 lr: 0.000010 grad: 0.1986 (0.2407) loss: 0.6491 (0.6599) time: 0.1560 data: 0.0547 max mem: 9377 +Train: [82] [3400/6250] eta: 0:07:51 lr: 0.000010 grad: 0.2075 (0.2404) loss: 0.6413 (0.6597) time: 0.1659 data: 0.0783 max mem: 9377 +Train: [82] [3500/6250] eta: 0:07:33 lr: 0.000010 grad: 0.2071 (0.2400) loss: 0.6655 (0.6595) time: 0.1681 data: 0.0792 max mem: 9377 +Train: [82] [3600/6250] eta: 0:07:16 lr: 0.000010 grad: 0.1953 (0.2395) loss: 0.6701 (0.6595) time: 0.1438 data: 0.0523 max mem: 9377 +Train: [82] [3700/6250] eta: 0:06:59 lr: 0.000010 grad: 0.2052 (0.2392) loss: 0.6502 (0.6594) time: 0.1745 data: 0.0935 max mem: 9377 +Train: [82] [3800/6250] eta: 0:06:42 lr: 0.000010 grad: 0.1962 (0.2395) loss: 0.6584 (0.6592) time: 0.1524 data: 0.0620 max mem: 9377 +Train: [82] [3900/6250] eta: 0:06:25 lr: 0.000010 grad: 0.1999 (0.2394) loss: 0.6247 (0.6592) time: 0.1345 data: 0.0419 max mem: 9377 +Train: [82] [4000/6250] eta: 0:06:08 lr: 0.000010 grad: 0.2013 (0.2391) loss: 0.6565 (0.6592) time: 0.1459 data: 0.0602 max mem: 9377 +Train: [82] [4100/6250] eta: 0:05:51 lr: 0.000010 grad: 0.1962 (0.2388) loss: 0.6614 (0.6593) time: 0.1513 data: 0.0604 max mem: 9377 +Train: [82] [4200/6250] eta: 0:05:35 lr: 0.000010 grad: 0.2009 (0.2385) loss: 0.6559 (0.6593) time: 0.1818 data: 0.1054 max mem: 9377 +Train: [82] [4300/6250] eta: 0:05:19 lr: 0.000010 grad: 0.1997 (0.2382) loss: 0.6627 (0.6593) time: 0.1277 data: 0.0395 max mem: 9377 +Train: [82] [4400/6250] eta: 0:05:02 lr: 0.000010 grad: 0.1923 (0.2378) loss: 0.6596 (0.6593) time: 0.1602 data: 0.0614 max mem: 9377 +Train: [82] [4500/6250] eta: 0:04:45 lr: 0.000010 grad: 0.2026 (0.2376) loss: 0.6530 (0.6591) time: 0.1811 data: 0.0969 max mem: 9377 +Train: [82] [4600/6250] eta: 0:04:29 lr: 0.000010 grad: 0.1953 (0.2371) loss: 0.6481 (0.6591) time: 0.1812 data: 0.1059 max mem: 9377 +Train: [82] [4700/6250] eta: 0:04:12 lr: 0.000010 grad: 0.1996 (0.2365) loss: 0.6533 (0.6589) time: 0.1567 data: 0.0717 max mem: 9377 +Train: [82] [4800/6250] eta: 0:03:56 lr: 0.000010 grad: 0.2012 (0.2362) loss: 0.6665 (0.6588) time: 0.1790 data: 0.0961 max mem: 9377 +Train: [82] [4900/6250] eta: 0:03:40 lr: 0.000010 grad: 0.1987 (0.2363) loss: 0.6516 (0.6588) time: 0.1442 data: 0.0613 max mem: 9377 +Train: [82] [5000/6250] eta: 0:03:23 lr: 0.000010 grad: 0.1944 (0.2367) loss: 0.6634 (0.6587) time: 0.1460 data: 0.0538 max mem: 9377 +Train: [82] [5100/6250] eta: 0:03:07 lr: 0.000010 grad: 0.2021 (0.2366) loss: 0.6446 (0.6587) time: 0.1572 data: 0.0774 max mem: 9377 +Train: [82] [5200/6250] eta: 0:02:51 lr: 0.000010 grad: 0.1958 (0.2368) loss: 0.6519 (0.6585) time: 0.1349 data: 0.0488 max mem: 9377 +Train: [82] [5300/6250] eta: 0:02:34 lr: 0.000010 grad: 0.2141 (0.2372) loss: 0.6428 (0.6583) time: 0.1493 data: 0.0645 max mem: 9377 +Train: [82] [5400/6250] eta: 0:02:18 lr: 0.000010 grad: 0.1956 (0.2370) loss: 0.6515 (0.6582) time: 0.1590 data: 0.0562 max mem: 9377 +Train: [82] [5500/6250] eta: 0:02:02 lr: 0.000010 grad: 0.2085 (0.2372) loss: 0.6602 (0.6581) time: 0.1397 data: 0.0339 max mem: 9377 +Train: [82] [5600/6250] eta: 0:01:45 lr: 0.000010 grad: 0.2083 (0.2375) loss: 0.6425 (0.6580) time: 0.1453 data: 0.0432 max mem: 9377 +Train: [82] [5700/6250] eta: 0:01:29 lr: 0.000010 grad: 0.1946 (0.2374) loss: 0.6634 (0.6579) time: 0.1634 data: 0.0706 max mem: 9377 +Train: [82] [5800/6250] eta: 0:01:13 lr: 0.000010 grad: 0.1979 (0.2375) loss: 0.6589 (0.6578) time: 0.1738 data: 0.0821 max mem: 9377 +Train: [82] [5900/6250] eta: 0:00:56 lr: 0.000010 grad: 0.2025 (0.2374) loss: 0.6721 (0.6578) time: 0.1284 data: 0.0400 max mem: 9377 +Train: [82] [6000/6250] eta: 0:00:40 lr: 0.000010 grad: 0.2016 (0.2376) loss: 0.6621 (0.6577) time: 0.1515 data: 0.0650 max mem: 9377 +Train: [82] [6100/6250] eta: 0:00:24 lr: 0.000010 grad: 0.1982 (0.2379) loss: 0.6537 (0.6578) time: 0.1685 data: 0.0840 max mem: 9377 +Train: [82] [6200/6250] eta: 0:00:08 lr: 0.000010 grad: 0.2002 (0.2381) loss: 0.6340 (0.6578) time: 0.1461 data: 0.0613 max mem: 9377 +Train: [82] [6249/6250] eta: 0:00:00 lr: 0.000010 grad: 0.1963 (0.2381) loss: 0.6448 (0.6577) time: 0.1666 data: 0.0797 max mem: 9377 +Train: [82] Total time: 0:16:58 (0.1629 s / it) +Averaged stats: lr: 0.000010 grad: 0.1963 (0.2381) loss: 0.6448 (0.6577) +Eval (hcp-train-subset): [82] [ 0/62] eta: 0:04:28 loss: 0.8973 (0.8973) time: 4.3353 data: 4.2691 max mem: 9377 +Eval (hcp-train-subset): [82] [61/62] eta: 0:00:00 loss: 0.9093 (0.9095) time: 0.1423 data: 0.1173 max mem: 9377 +Eval (hcp-train-subset): [82] Total time: 0:00:14 (0.2401 s / it) +Averaged stats (hcp-train-subset): loss: 0.9093 (0.9095) +Eval (hcp-val): [82] [ 0/62] eta: 0:05:50 loss: 0.9057 (0.9057) time: 5.6521 data: 5.6087 max mem: 9377 +Eval (hcp-val): [82] [61/62] eta: 0:00:00 loss: 0.9085 (0.9092) time: 0.1427 data: 0.1160 max mem: 9377 +Eval (hcp-val): [82] Total time: 0:00:14 (0.2388 s / it) +Averaged stats (hcp-val): loss: 0.9085 (0.9092) +Eval (nsd-val): [82] [ 0/62] eta: 0:05:41 loss: 0.9135 (0.9135) time: 5.5112 data: 5.4812 max mem: 9377 +Eval (nsd-val): [82] [61/62] eta: 0:00:00 loss: 0.9126 (0.9147) time: 0.1136 data: 0.0885 max mem: 9377 +Eval (nsd-val): [82] Total time: 0:00:14 (0.2386 s / it) +Averaged stats (nsd-val): loss: 0.9126 (0.9147) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [83] [ 0/6250] eta: 9:42:32 lr: 0.000010 grad: 0.2137 (0.2137) loss: 0.7479 (0.7479) time: 5.5924 data: 5.3402 max mem: 9377 +Train: [83] [ 100/6250] eta: 0:22:43 lr: 0.000010 grad: 0.2258 (0.2608) loss: 0.6893 (0.6909) time: 0.1922 data: 0.0929 max mem: 9377 +Train: [83] [ 200/6250] eta: 0:19:34 lr: 0.000010 grad: 0.2151 (0.2450) loss: 0.6410 (0.6797) time: 0.1700 data: 0.0624 max mem: 9377 +Train: [83] [ 300/6250] eta: 0:18:00 lr: 0.000010 grad: 0.2056 (0.2420) loss: 0.6576 (0.6721) time: 0.1437 data: 0.0388 max mem: 9377 +Train: [83] [ 400/6250] eta: 0:17:03 lr: 0.000010 grad: 0.2039 (0.2394) loss: 0.6711 (0.6676) time: 0.1585 data: 0.0660 max mem: 9377 +Train: [83] [ 500/6250] eta: 0:16:22 lr: 0.000010 grad: 0.2004 (0.2397) loss: 0.6539 (0.6665) time: 0.1480 data: 0.0482 max mem: 9377 +Train: [83] [ 600/6250] eta: 0:15:50 lr: 0.000010 grad: 0.2017 (0.2383) loss: 0.6473 (0.6657) time: 0.1634 data: 0.0620 max mem: 9377 +Train: [83] [ 700/6250] eta: 0:15:26 lr: 0.000009 grad: 0.1977 (0.2384) loss: 0.6613 (0.6650) time: 0.1385 data: 0.0312 max mem: 9377 +Train: [83] [ 800/6250] eta: 0:15:03 lr: 0.000009 grad: 0.1941 (0.2363) loss: 0.6618 (0.6645) time: 0.1570 data: 0.0654 max mem: 9377 +Train: [83] [ 900/6250] eta: 0:14:49 lr: 0.000009 grad: 0.2033 (0.2377) loss: 0.6632 (0.6642) time: 0.2221 data: 0.1296 max mem: 9377 +Train: [83] [1000/6250] eta: 0:14:27 lr: 0.000009 grad: 0.2029 (0.2384) loss: 0.6548 (0.6635) time: 0.1579 data: 0.0765 max mem: 9377 +Train: [83] [1100/6250] eta: 0:14:13 lr: 0.000009 grad: 0.1963 (0.2371) loss: 0.6568 (0.6631) time: 0.1729 data: 0.0816 max mem: 9377 +Train: [83] [1200/6250] eta: 0:13:50 lr: 0.000009 grad: 0.1992 (0.2378) loss: 0.6613 (0.6628) time: 0.1485 data: 0.0696 max mem: 9377 +Train: [83] [1300/6250] eta: 0:13:36 lr: 0.000009 grad: 0.2007 (0.2388) loss: 0.6715 (0.6633) time: 0.1549 data: 0.0644 max mem: 9377 +Train: [83] [1400/6250] eta: 0:13:18 lr: 0.000009 grad: 0.1921 (0.2375) loss: 0.6773 (0.6633) time: 0.1623 data: 0.0685 max mem: 9377 +Train: [83] [1500/6250] eta: 0:12:58 lr: 0.000009 grad: 0.1976 (0.2373) loss: 0.6631 (0.6631) time: 0.1456 data: 0.0550 max mem: 9377 +Train: [83] [1600/6250] eta: 0:12:39 lr: 0.000009 grad: 0.1986 (0.2373) loss: 0.6639 (0.6631) time: 0.1380 data: 0.0422 max mem: 9377 +Train: [83] [1700/6250] eta: 0:12:21 lr: 0.000009 grad: 0.2061 (0.2370) loss: 0.6529 (0.6631) time: 0.1499 data: 0.0586 max mem: 9377 +Train: [83] [1800/6250] eta: 0:12:03 lr: 0.000009 grad: 0.2009 (0.2370) loss: 0.6511 (0.6630) time: 0.1707 data: 0.0886 max mem: 9377 +Train: [83] [1900/6250] eta: 0:11:45 lr: 0.000009 grad: 0.2026 (0.2369) loss: 0.6791 (0.6630) time: 0.1687 data: 0.0774 max mem: 9377 +Train: [83] [2000/6250] eta: 0:11:30 lr: 0.000009 grad: 0.1945 (0.2371) loss: 0.6682 (0.6631) time: 0.1953 data: 0.1095 max mem: 9377 +Train: [83] [2100/6250] eta: 0:11:12 lr: 0.000009 grad: 0.2005 (0.2369) loss: 0.6508 (0.6630) time: 0.1386 data: 0.0569 max mem: 9377 +Train: [83] [2200/6250] eta: 0:10:57 lr: 0.000009 grad: 0.1989 (0.2365) loss: 0.6526 (0.6629) time: 0.1617 data: 0.0841 max mem: 9377 +Train: [83] [2300/6250] eta: 0:10:37 lr: 0.000009 grad: 0.1942 (0.2366) loss: 0.6732 (0.6630) time: 0.1591 data: 0.0696 max mem: 9377 +Train: [83] [2400/6250] eta: 0:10:19 lr: 0.000009 grad: 0.2094 (0.2372) loss: 0.6613 (0.6630) time: 0.1364 data: 0.0457 max mem: 9377 +Train: [83] [2500/6250] eta: 0:10:02 lr: 0.000009 grad: 0.1948 (0.2364) loss: 0.6564 (0.6628) time: 0.1383 data: 0.0407 max mem: 9377 +Train: [83] [2600/6250] eta: 0:09:47 lr: 0.000009 grad: 0.2031 (0.2375) loss: 0.6500 (0.6626) time: 0.1663 data: 0.0783 max mem: 9377 +Train: [83] [2700/6250] eta: 0:09:33 lr: 0.000009 grad: 0.2010 (0.2374) loss: 0.6613 (0.6625) time: 0.1832 data: 0.0939 max mem: 9377 +Train: [83] [2800/6250] eta: 0:09:20 lr: 0.000009 grad: 0.1971 (0.2373) loss: 0.6580 (0.6623) time: 0.2178 data: 0.1290 max mem: 9377 +Train: [83] [2900/6250] eta: 0:09:04 lr: 0.000009 grad: 0.1998 (0.2367) loss: 0.6600 (0.6622) time: 0.1639 data: 0.0810 max mem: 9377 +Train: [83] [3000/6250] eta: 0:08:49 lr: 0.000009 grad: 0.1952 (0.2363) loss: 0.6653 (0.6621) time: 0.1791 data: 0.0823 max mem: 9377 +Train: [83] [3100/6250] eta: 0:08:35 lr: 0.000009 grad: 0.1921 (0.2360) loss: 0.6746 (0.6622) time: 0.1908 data: 0.1039 max mem: 9377 +Train: [83] [3200/6250] eta: 0:08:19 lr: 0.000009 grad: 0.1944 (0.2379) loss: 0.6602 (0.6622) time: 0.1774 data: 0.0956 max mem: 9377 +Train: [83] [3300/6250] eta: 0:08:04 lr: 0.000009 grad: 0.1952 (0.2374) loss: 0.6756 (0.6625) time: 0.1597 data: 0.0675 max mem: 9377 +Train: [83] [3400/6250] eta: 0:07:48 lr: 0.000009 grad: 0.1940 (0.2378) loss: 0.6625 (0.6627) time: 0.1749 data: 0.0854 max mem: 9377 +Train: [83] [3500/6250] eta: 0:07:32 lr: 0.000009 grad: 0.2007 (0.2380) loss: 0.6738 (0.6627) time: 0.1565 data: 0.0597 max mem: 9377 +Train: [83] [3600/6250] eta: 0:07:15 lr: 0.000009 grad: 0.1950 (0.2384) loss: 0.6788 (0.6626) time: 0.1404 data: 0.0402 max mem: 9377 +Train: [83] [3700/6250] eta: 0:06:58 lr: 0.000009 grad: 0.2085 (0.2392) loss: 0.6505 (0.6625) time: 0.1620 data: 0.0640 max mem: 9377 +Train: [83] [3800/6250] eta: 0:06:41 lr: 0.000009 grad: 0.2114 (0.2393) loss: 0.6625 (0.6624) time: 0.1648 data: 0.0864 max mem: 9377 +Train: [83] [3900/6250] eta: 0:06:25 lr: 0.000009 grad: 0.1925 (0.2392) loss: 0.6714 (0.6625) time: 0.1753 data: 0.0830 max mem: 9377 +Train: [83] [4000/6250] eta: 0:06:08 lr: 0.000009 grad: 0.1931 (0.2391) loss: 0.6677 (0.6626) time: 0.1538 data: 0.0582 max mem: 9377 +Train: [83] [4100/6250] eta: 0:05:51 lr: 0.000009 grad: 0.1913 (0.2390) loss: 0.6760 (0.6628) time: 0.1485 data: 0.0572 max mem: 9377 +Train: [83] [4200/6250] eta: 0:05:34 lr: 0.000009 grad: 0.1949 (0.2394) loss: 0.6575 (0.6630) time: 0.1575 data: 0.0748 max mem: 9377 +Train: [83] [4300/6250] eta: 0:05:18 lr: 0.000009 grad: 0.2084 (0.2394) loss: 0.6728 (0.6631) time: 0.1595 data: 0.0629 max mem: 9377 +Train: [83] [4400/6250] eta: 0:05:01 lr: 0.000009 grad: 0.1935 (0.2387) loss: 0.6694 (0.6633) time: 0.1756 data: 0.0939 max mem: 9377 +Train: [83] [4500/6250] eta: 0:04:45 lr: 0.000009 grad: 0.1974 (0.2392) loss: 0.6739 (0.6634) time: 0.1504 data: 0.0606 max mem: 9377 +Train: [83] [4600/6250] eta: 0:04:28 lr: 0.000009 grad: 0.1958 (0.2390) loss: 0.6693 (0.6636) time: 0.1455 data: 0.0613 max mem: 9377 +Train: [83] [4700/6250] eta: 0:04:12 lr: 0.000009 grad: 0.2034 (0.2388) loss: 0.6561 (0.6638) time: 0.1924 data: 0.1055 max mem: 9377 +Train: [83] [4800/6250] eta: 0:03:56 lr: 0.000009 grad: 0.2061 (0.2385) loss: 0.6608 (0.6638) time: 0.1765 data: 0.0909 max mem: 9377 +Train: [83] [4900/6250] eta: 0:03:40 lr: 0.000009 grad: 0.1934 (0.2380) loss: 0.6653 (0.6639) time: 0.1693 data: 0.0727 max mem: 9377 +Train: [83] [5000/6250] eta: 0:03:23 lr: 0.000009 grad: 0.1957 (0.2379) loss: 0.6508 (0.6638) time: 0.1640 data: 0.0697 max mem: 9377 +Train: [83] [5100/6250] eta: 0:03:07 lr: 0.000009 grad: 0.2107 (0.2377) loss: 0.6608 (0.6638) time: 0.1657 data: 0.0830 max mem: 9377 +Train: [83] [5200/6250] eta: 0:02:51 lr: 0.000009 grad: 0.1971 (0.2378) loss: 0.6582 (0.6637) time: 0.1676 data: 0.0789 max mem: 9377 +Train: [83] [5300/6250] eta: 0:02:34 lr: 0.000009 grad: 0.1983 (0.2381) loss: 0.6623 (0.6637) time: 0.1603 data: 0.0704 max mem: 9377 +Train: [83] [5400/6250] eta: 0:02:18 lr: 0.000009 grad: 0.2012 (0.2378) loss: 0.6742 (0.6637) time: 0.1569 data: 0.0695 max mem: 9377 +Train: [83] [5500/6250] eta: 0:02:01 lr: 0.000009 grad: 0.1962 (0.2378) loss: 0.6592 (0.6636) time: 0.1523 data: 0.0637 max mem: 9377 +Train: [83] [5600/6250] eta: 0:01:45 lr: 0.000009 grad: 0.2027 (0.2380) loss: 0.6460 (0.6635) time: 0.1555 data: 0.0622 max mem: 9377 +Train: [83] [5700/6250] eta: 0:01:29 lr: 0.000009 grad: 0.1951 (0.2384) loss: 0.6830 (0.6635) time: 0.1504 data: 0.0550 max mem: 9377 +Train: [83] [5800/6250] eta: 0:01:12 lr: 0.000009 grad: 0.2143 (0.2386) loss: 0.6639 (0.6635) time: 0.1513 data: 0.0579 max mem: 9377 +Train: [83] [5900/6250] eta: 0:00:56 lr: 0.000009 grad: 0.1939 (0.2381) loss: 0.6735 (0.6634) time: 0.1586 data: 0.0655 max mem: 9377 +Train: [83] [6000/6250] eta: 0:00:40 lr: 0.000009 grad: 0.1987 (0.2381) loss: 0.6581 (0.6634) time: 0.1506 data: 0.0605 max mem: 9377 +Train: [83] [6100/6250] eta: 0:00:24 lr: 0.000009 grad: 0.1937 (0.2379) loss: 0.6667 (0.6634) time: 0.1396 data: 0.0530 max mem: 9377 +Train: [83] [6200/6250] eta: 0:00:08 lr: 0.000009 grad: 0.1969 (0.2376) loss: 0.6601 (0.6633) time: 0.1377 data: 0.0492 max mem: 9377 +Train: [83] [6249/6250] eta: 0:00:00 lr: 0.000009 grad: 0.1955 (0.2376) loss: 0.6632 (0.6633) time: 0.2198 data: 0.1379 max mem: 9377 +Train: [83] Total time: 0:16:55 (0.1624 s / it) +Averaged stats: lr: 0.000009 grad: 0.1955 (0.2376) loss: 0.6632 (0.6633) +Eval (hcp-train-subset): [83] [ 0/62] eta: 0:06:26 loss: 0.8962 (0.8962) time: 6.2402 data: 6.1973 max mem: 9377 +Eval (hcp-train-subset): [83] [61/62] eta: 0:00:00 loss: 0.9111 (0.9104) time: 0.1490 data: 0.1222 max mem: 9377 +Eval (hcp-train-subset): [83] Total time: 0:00:14 (0.2378 s / it) +Averaged stats (hcp-train-subset): loss: 0.9111 (0.9104) +Eval (hcp-val): [83] [ 0/62] eta: 0:04:04 loss: 0.9096 (0.9096) time: 3.9409 data: 3.8375 max mem: 9377 +Eval (hcp-val): [83] [61/62] eta: 0:00:00 loss: 0.9092 (0.9095) time: 0.1593 data: 0.1341 max mem: 9377 +Eval (hcp-val): [83] Total time: 0:00:15 (0.2523 s / it) +Averaged stats (hcp-val): loss: 0.9092 (0.9095) +Eval (nsd-val): [83] [ 0/62] eta: 0:06:08 loss: 0.9158 (0.9158) time: 5.9435 data: 5.9128 max mem: 9377 +Eval (nsd-val): [83] [61/62] eta: 0:00:00 loss: 0.9165 (0.9189) time: 0.1599 data: 0.1336 max mem: 9377 +Eval (nsd-val): [83] Total time: 0:00:16 (0.2652 s / it) +Averaged stats (nsd-val): loss: 0.9165 (0.9189) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [84] [ 0/6250] eta: 13:19:19 lr: 0.000009 grad: 0.1923 (0.1923) loss: 0.7994 (0.7994) time: 7.6736 data: 7.5147 max mem: 9377 +Train: [84] [ 100/6250] eta: 0:26:16 lr: 0.000009 grad: 0.2368 (0.2613) loss: 0.6400 (0.6584) time: 0.1939 data: 0.0830 max mem: 9377 +Train: [84] [ 200/6250] eta: 0:22:37 lr: 0.000009 grad: 0.2185 (0.2580) loss: 0.6595 (0.6530) time: 0.2070 data: 0.1049 max mem: 9377 +Train: [84] [ 300/6250] eta: 0:20:48 lr: 0.000008 grad: 0.2010 (0.2563) loss: 0.6597 (0.6560) time: 0.1662 data: 0.0618 max mem: 9377 +Train: [84] [ 400/6250] eta: 0:19:23 lr: 0.000008 grad: 0.1949 (0.2547) loss: 0.6696 (0.6570) time: 0.1476 data: 0.0547 max mem: 9377 +Train: [84] [ 500/6250] eta: 0:18:25 lr: 0.000008 grad: 0.2040 (0.2497) loss: 0.6565 (0.6576) time: 0.1672 data: 0.0708 max mem: 9377 +Train: [84] [ 600/6250] eta: 0:17:37 lr: 0.000008 grad: 0.2033 (0.2478) loss: 0.6526 (0.6583) time: 0.1413 data: 0.0430 max mem: 9377 +Train: [84] [ 700/6250] eta: 0:16:58 lr: 0.000008 grad: 0.2015 (0.2474) loss: 0.6615 (0.6584) time: 0.1743 data: 0.0863 max mem: 9377 +Train: [84] [ 800/6250] eta: 0:16:23 lr: 0.000008 grad: 0.1933 (0.2450) loss: 0.6521 (0.6588) time: 0.1513 data: 0.0543 max mem: 9377 +Train: [84] [ 900/6250] eta: 0:15:53 lr: 0.000008 grad: 0.1959 (0.2425) loss: 0.6676 (0.6593) time: 0.1762 data: 0.0831 max mem: 9377 +Train: [84] [1000/6250] eta: 0:15:30 lr: 0.000008 grad: 0.1964 (0.2434) loss: 0.6696 (0.6603) time: 0.1642 data: 0.0816 max mem: 9377 +Train: [84] [1100/6250] eta: 0:15:07 lr: 0.000008 grad: 0.1868 (0.2414) loss: 0.6852 (0.6614) time: 0.1637 data: 0.0750 max mem: 9377 +Train: [84] [1200/6250] eta: 0:14:44 lr: 0.000008 grad: 0.1872 (0.2413) loss: 0.6863 (0.6624) time: 0.1497 data: 0.0637 max mem: 9377 +Train: [84] [1300/6250] eta: 0:14:21 lr: 0.000008 grad: 0.1945 (0.2414) loss: 0.6656 (0.6628) time: 0.1739 data: 0.0767 max mem: 9377 +Train: [84] [1400/6250] eta: 0:13:57 lr: 0.000008 grad: 0.1995 (0.2422) loss: 0.6670 (0.6629) time: 0.1682 data: 0.0784 max mem: 9377 +Train: [84] [1500/6250] eta: 0:13:36 lr: 0.000008 grad: 0.1984 (0.2456) loss: 0.6699 (0.6631) time: 0.1410 data: 0.0389 max mem: 9377 +Train: [84] [1600/6250] eta: 0:13:14 lr: 0.000008 grad: 0.1963 (0.2446) loss: 0.6565 (0.6632) time: 0.1519 data: 0.0588 max mem: 9377 +Train: [84] [1700/6250] eta: 0:12:52 lr: 0.000008 grad: 0.1953 (0.2437) loss: 0.6664 (0.6636) time: 0.1787 data: 0.0856 max mem: 9377 +Train: [84] [1800/6250] eta: 0:12:29 lr: 0.000008 grad: 0.1947 (0.2425) loss: 0.6612 (0.6635) time: 0.1602 data: 0.0760 max mem: 9377 +Train: [84] [1900/6250] eta: 0:12:07 lr: 0.000008 grad: 0.1979 (0.2415) loss: 0.6677 (0.6635) time: 0.1421 data: 0.0516 max mem: 9377 +Train: [84] [2000/6250] eta: 0:11:46 lr: 0.000008 grad: 0.1911 (0.2408) loss: 0.6686 (0.6633) time: 0.1494 data: 0.0529 max mem: 9377 +Train: [84] [2100/6250] eta: 0:11:27 lr: 0.000008 grad: 0.2051 (0.2399) loss: 0.6623 (0.6632) time: 0.1569 data: 0.0684 max mem: 9377 +Train: [84] [2200/6250] eta: 0:11:08 lr: 0.000008 grad: 0.1994 (0.2387) loss: 0.6556 (0.6633) time: 0.1584 data: 0.0711 max mem: 9377 +Train: [84] [2300/6250] eta: 0:10:50 lr: 0.000008 grad: 0.1969 (0.2384) loss: 0.6586 (0.6631) time: 0.1583 data: 0.0708 max mem: 9377 +Train: [84] [2400/6250] eta: 0:10:31 lr: 0.000008 grad: 0.1981 (0.2379) loss: 0.6569 (0.6630) time: 0.1486 data: 0.0537 max mem: 9377 +Train: [84] [2500/6250] eta: 0:10:13 lr: 0.000008 grad: 0.2032 (0.2378) loss: 0.6630 (0.6627) time: 0.1411 data: 0.0582 max mem: 9377 +Train: [84] [2600/6250] eta: 0:09:55 lr: 0.000008 grad: 0.1967 (0.2377) loss: 0.6586 (0.6625) time: 0.1436 data: 0.0540 max mem: 9377 +Train: [84] [2700/6250] eta: 0:09:40 lr: 0.000008 grad: 0.2028 (0.2381) loss: 0.6656 (0.6625) time: 0.2403 data: 0.1674 max mem: 9377 +Train: [84] [2800/6250] eta: 0:09:23 lr: 0.000008 grad: 0.1969 (0.2382) loss: 0.6540 (0.6626) time: 0.1470 data: 0.0653 max mem: 9377 +Train: [84] [2900/6250] eta: 0:09:06 lr: 0.000008 grad: 0.2000 (0.2382) loss: 0.6726 (0.6628) time: 0.1550 data: 0.0691 max mem: 9377 +Train: [84] [3000/6250] eta: 0:08:49 lr: 0.000008 grad: 0.1917 (0.2388) loss: 0.6707 (0.6628) time: 0.1298 data: 0.0563 max mem: 9377 +Train: [84] [3100/6250] eta: 0:08:33 lr: 0.000008 grad: 0.2068 (0.2396) loss: 0.6620 (0.6628) time: 0.1759 data: 0.0915 max mem: 9377 +Train: [84] [3200/6250] eta: 0:08:17 lr: 0.000008 grad: 0.2066 (0.2412) loss: 0.6660 (0.6629) time: 0.1862 data: 0.0937 max mem: 9377 +Train: [84] [3300/6250] eta: 0:08:01 lr: 0.000008 grad: 0.1944 (0.2409) loss: 0.6747 (0.6629) time: 0.1560 data: 0.0593 max mem: 9377 +Train: [84] [3400/6250] eta: 0:07:45 lr: 0.000008 grad: 0.1962 (0.2411) loss: 0.6707 (0.6629) time: 0.1459 data: 0.0559 max mem: 9377 +Train: [84] [3500/6250] eta: 0:07:28 lr: 0.000008 grad: 0.1989 (0.2418) loss: 0.6561 (0.6628) time: 0.1516 data: 0.0464 max mem: 9377 +Train: [84] [3600/6250] eta: 0:07:12 lr: 0.000008 grad: 0.1985 (0.2415) loss: 0.6562 (0.6626) time: 0.1348 data: 0.0371 max mem: 9377 +Train: [84] [3700/6250] eta: 0:06:55 lr: 0.000008 grad: 0.2010 (0.2410) loss: 0.6557 (0.6624) time: 0.1681 data: 0.0747 max mem: 9377 +Train: [84] [3800/6250] eta: 0:06:38 lr: 0.000008 grad: 0.2001 (0.2410) loss: 0.6526 (0.6622) time: 0.1426 data: 0.0467 max mem: 9377 +Train: [84] [3900/6250] eta: 0:06:21 lr: 0.000008 grad: 0.2004 (0.2410) loss: 0.6659 (0.6622) time: 0.1603 data: 0.0712 max mem: 9377 +Train: [84] [4000/6250] eta: 0:06:05 lr: 0.000008 grad: 0.1903 (0.2409) loss: 0.6526 (0.6621) time: 0.1532 data: 0.0642 max mem: 9377 +Train: [84] [4100/6250] eta: 0:05:48 lr: 0.000008 grad: 0.2013 (0.2401) loss: 0.6642 (0.6620) time: 0.1558 data: 0.0653 max mem: 9377 +Train: [84] [4200/6250] eta: 0:05:32 lr: 0.000008 grad: 0.1941 (0.2398) loss: 0.6529 (0.6619) time: 0.1628 data: 0.0784 max mem: 9377 +Train: [84] [4300/6250] eta: 0:05:15 lr: 0.000008 grad: 0.2040 (0.2402) loss: 0.6645 (0.6618) time: 0.1814 data: 0.0946 max mem: 9377 +Train: [84] [4400/6250] eta: 0:04:59 lr: 0.000008 grad: 0.1963 (0.2404) loss: 0.6556 (0.6617) time: 0.1604 data: 0.0719 max mem: 9377 +Train: [84] [4500/6250] eta: 0:04:42 lr: 0.000008 grad: 0.2046 (0.2402) loss: 0.6536 (0.6616) time: 0.1508 data: 0.0669 max mem: 9377 +Train: [84] [4600/6250] eta: 0:04:26 lr: 0.000008 grad: 0.1980 (0.2406) loss: 0.6560 (0.6614) time: 0.1426 data: 0.0481 max mem: 9377 +Train: [84] [4700/6250] eta: 0:04:10 lr: 0.000008 grad: 0.1951 (0.2407) loss: 0.6578 (0.6613) time: 0.1466 data: 0.0666 max mem: 9377 +Train: [84] [4800/6250] eta: 0:03:54 lr: 0.000008 grad: 0.1984 (0.2405) loss: 0.6417 (0.6612) time: 0.1449 data: 0.0515 max mem: 9377 +Train: [84] [4900/6250] eta: 0:03:38 lr: 0.000008 grad: 0.2022 (0.2407) loss: 0.6581 (0.6610) time: 0.1240 data: 0.0338 max mem: 9377 +Train: [84] [5000/6250] eta: 0:03:22 lr: 0.000008 grad: 0.2101 (0.2413) loss: 0.6591 (0.6609) time: 0.1351 data: 0.0492 max mem: 9377 +Train: [84] [5100/6250] eta: 0:03:06 lr: 0.000008 grad: 0.2055 (0.2415) loss: 0.6529 (0.6608) time: 0.1864 data: 0.1004 max mem: 9377 +Train: [84] [5200/6250] eta: 0:02:50 lr: 0.000008 grad: 0.2026 (0.2412) loss: 0.6663 (0.6608) time: 0.1584 data: 0.0729 max mem: 9377 +Train: [84] [5300/6250] eta: 0:02:33 lr: 0.000008 grad: 0.1988 (0.2411) loss: 0.6584 (0.6608) time: 0.1550 data: 0.0631 max mem: 9377 +Train: [84] [5400/6250] eta: 0:02:17 lr: 0.000008 grad: 0.2008 (0.2410) loss: 0.6541 (0.6607) time: 0.1907 data: 0.1054 max mem: 9377 +Train: [84] [5500/6250] eta: 0:02:01 lr: 0.000008 grad: 0.1985 (0.2408) loss: 0.6503 (0.6606) time: 0.1577 data: 0.0662 max mem: 9377 +Train: [84] [5600/6250] eta: 0:01:44 lr: 0.000008 grad: 0.1936 (0.2410) loss: 0.6550 (0.6606) time: 0.1502 data: 0.0509 max mem: 9377 +Train: [84] [5700/6250] eta: 0:01:28 lr: 0.000008 grad: 0.1943 (0.2410) loss: 0.6663 (0.6606) time: 0.1550 data: 0.0573 max mem: 9377 +Train: [84] [5800/6250] eta: 0:01:12 lr: 0.000008 grad: 0.2039 (0.2407) loss: 0.6541 (0.6606) time: 0.1557 data: 0.0680 max mem: 9377 +Train: [84] [5900/6250] eta: 0:00:56 lr: 0.000008 grad: 0.2093 (0.2407) loss: 0.6587 (0.6606) time: 0.1360 data: 0.0527 max mem: 9377 +Train: [84] [6000/6250] eta: 0:00:40 lr: 0.000008 grad: 0.1971 (0.2406) loss: 0.6568 (0.6606) time: 0.1655 data: 0.0813 max mem: 9377 +Train: [84] [6100/6250] eta: 0:00:24 lr: 0.000008 grad: 0.2009 (0.2406) loss: 0.6629 (0.6606) time: 0.1667 data: 0.0779 max mem: 9377 +Train: [84] [6200/6250] eta: 0:00:08 lr: 0.000008 grad: 0.2124 (0.2408) loss: 0.6662 (0.6606) time: 0.1465 data: 0.0526 max mem: 9377 +Train: [84] [6249/6250] eta: 0:00:00 lr: 0.000008 grad: 0.2043 (0.2407) loss: 0.6716 (0.6606) time: 0.1405 data: 0.0523 max mem: 9377 +Train: [84] Total time: 0:16:49 (0.1615 s / it) +Averaged stats: lr: 0.000008 grad: 0.2043 (0.2407) loss: 0.6716 (0.6606) +Eval (hcp-train-subset): [84] [ 0/62] eta: 0:05:01 loss: 0.8978 (0.8978) time: 4.8627 data: 4.7917 max mem: 9377 +Eval (hcp-train-subset): [84] [61/62] eta: 0:00:00 loss: 0.9103 (0.9103) time: 0.1432 data: 0.1163 max mem: 9377 +Eval (hcp-train-subset): [84] Total time: 0:00:15 (0.2441 s / it) +Averaged stats (hcp-train-subset): loss: 0.9103 (0.9103) +Making plots (hcp-train-subset): example=49 +Eval (hcp-val): [84] [ 0/62] eta: 0:05:39 loss: 0.9152 (0.9152) time: 5.4787 data: 5.4463 max mem: 9377 +Eval (hcp-val): [84] [61/62] eta: 0:00:00 loss: 0.9112 (0.9115) time: 0.1504 data: 0.1230 max mem: 9377 +Eval (hcp-val): [84] Total time: 0:00:15 (0.2430 s / it) +Averaged stats (hcp-val): loss: 0.9112 (0.9115) +Making plots (hcp-val): example=57 +Eval (nsd-val): [84] [ 0/62] eta: 0:05:46 loss: 0.9173 (0.9173) time: 5.5915 data: 5.5501 max mem: 9377 +Eval (nsd-val): [84] [61/62] eta: 0:00:00 loss: 0.9155 (0.9171) time: 0.1179 data: 0.0925 max mem: 9377 +Eval (nsd-val): [84] Total time: 0:00:14 (0.2413 s / it) +Averaged stats (nsd-val): loss: 0.9155 (0.9171) +Making plots (nsd-val): example=40 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00084.pth +Train: [85] [ 0/6250] eta: 10:17:57 lr: 0.000008 grad: 0.3067 (0.3067) loss: 0.7805 (0.7805) time: 5.9323 data: 5.7000 max mem: 9377 +Train: [85] [ 100/6250] eta: 0:22:16 lr: 0.000008 grad: 0.2409 (0.2743) loss: 0.6563 (0.6935) time: 0.1604 data: 0.0449 max mem: 9377 +Train: [85] [ 200/6250] eta: 0:19:15 lr: 0.000008 grad: 0.2187 (0.2845) loss: 0.6597 (0.6748) time: 0.1579 data: 0.0571 max mem: 9377 +Train: [85] [ 300/6250] eta: 0:17:55 lr: 0.000007 grad: 0.2088 (0.2740) loss: 0.6532 (0.6678) time: 0.1549 data: 0.0534 max mem: 9377 +Train: [85] [ 400/6250] eta: 0:17:05 lr: 0.000007 grad: 0.2200 (0.2783) loss: 0.6466 (0.6650) time: 0.1667 data: 0.0767 max mem: 9377 +Train: [85] [ 500/6250] eta: 0:16:23 lr: 0.000007 grad: 0.2042 (0.2739) loss: 0.6565 (0.6627) time: 0.1533 data: 0.0613 max mem: 9377 +Train: [85] [ 600/6250] eta: 0:15:48 lr: 0.000007 grad: 0.2109 (0.2767) loss: 0.6476 (0.6612) time: 0.1607 data: 0.0726 max mem: 9377 +Train: [85] [ 700/6250] eta: 0:15:19 lr: 0.000007 grad: 0.2242 (0.2720) loss: 0.6463 (0.6610) time: 0.1492 data: 0.0496 max mem: 9377 +Train: [85] [ 800/6250] eta: 0:14:56 lr: 0.000007 grad: 0.2193 (0.2704) loss: 0.6442 (0.6608) time: 0.1630 data: 0.0643 max mem: 9377 +Train: [85] [ 900/6250] eta: 0:14:44 lr: 0.000007 grad: 0.2016 (0.2712) loss: 0.6709 (0.6601) time: 0.1823 data: 0.0858 max mem: 9377 +Train: [85] [1000/6250] eta: 0:14:25 lr: 0.000007 grad: 0.2099 (0.2673) loss: 0.6311 (0.6591) time: 0.1618 data: 0.0707 max mem: 9377 +Train: [85] [1100/6250] eta: 0:14:05 lr: 0.000007 grad: 0.2045 (0.2664) loss: 0.6514 (0.6580) time: 0.1493 data: 0.0591 max mem: 9377 +Train: [85] [1200/6250] eta: 0:13:43 lr: 0.000007 grad: 0.2049 (0.2632) loss: 0.6490 (0.6575) time: 0.1486 data: 0.0669 max mem: 9377 +Train: [85] [1300/6250] eta: 0:13:33 lr: 0.000007 grad: 0.2030 (0.2610) loss: 0.6615 (0.6571) time: 0.1980 data: 0.0993 max mem: 9377 +Train: [85] [1400/6250] eta: 0:13:24 lr: 0.000007 grad: 0.2026 (0.2602) loss: 0.6466 (0.6563) time: 0.1965 data: 0.1017 max mem: 9377 +Train: [85] [1500/6250] eta: 0:13:13 lr: 0.000007 grad: 0.2063 (0.2600) loss: 0.6395 (0.6556) time: 0.1633 data: 0.0684 max mem: 9377 +Train: [85] [1600/6250] eta: 0:12:55 lr: 0.000007 grad: 0.2096 (0.2581) loss: 0.6555 (0.6551) time: 0.1641 data: 0.0698 max mem: 9377 +Train: [85] [1700/6250] eta: 0:12:37 lr: 0.000007 grad: 0.1981 (0.2581) loss: 0.6540 (0.6549) time: 0.1570 data: 0.0737 max mem: 9377 +Train: [85] [1800/6250] eta: 0:12:20 lr: 0.000007 grad: 0.2167 (0.2565) loss: 0.6386 (0.6544) time: 0.1650 data: 0.0746 max mem: 9377 +Train: [85] [1900/6250] eta: 0:12:02 lr: 0.000007 grad: 0.2067 (0.2550) loss: 0.6378 (0.6543) time: 0.1726 data: 0.0743 max mem: 9377 +Train: [85] [2000/6250] eta: 0:11:43 lr: 0.000007 grad: 0.1994 (0.2551) loss: 0.6500 (0.6543) time: 0.1467 data: 0.0600 max mem: 9377 +Train: [85] [2100/6250] eta: 0:11:26 lr: 0.000007 grad: 0.1956 (0.2545) loss: 0.6625 (0.6542) time: 0.1820 data: 0.0954 max mem: 9377 +Train: [85] [2200/6250] eta: 0:11:10 lr: 0.000007 grad: 0.2001 (0.2535) loss: 0.6489 (0.6542) time: 0.1781 data: 0.0913 max mem: 9377 +Train: [85] [2300/6250] eta: 0:10:53 lr: 0.000007 grad: 0.2037 (0.2530) loss: 0.6492 (0.6540) time: 0.1633 data: 0.0826 max mem: 9377 +Train: [85] [2400/6250] eta: 0:10:34 lr: 0.000007 grad: 0.2067 (0.2542) loss: 0.6398 (0.6541) time: 0.1682 data: 0.0828 max mem: 9377 +Train: [85] [2500/6250] eta: 0:10:16 lr: 0.000007 grad: 0.2014 (0.2545) loss: 0.6602 (0.6542) time: 0.1342 data: 0.0526 max mem: 9377 +Train: [85] [2600/6250] eta: 0:09:59 lr: 0.000007 grad: 0.2087 (0.2551) loss: 0.6593 (0.6541) time: 0.1577 data: 0.0687 max mem: 9377 +Train: [85] [2700/6250] eta: 0:09:44 lr: 0.000007 grad: 0.2108 (0.2553) loss: 0.6503 (0.6541) time: 0.1741 data: 0.0878 max mem: 9377 +Train: [85] [2800/6250] eta: 0:09:27 lr: 0.000007 grad: 0.2055 (0.2548) loss: 0.6608 (0.6542) time: 0.1697 data: 0.0851 max mem: 9377 +Train: [85] [2900/6250] eta: 0:09:10 lr: 0.000007 grad: 0.2233 (0.2557) loss: 0.6502 (0.6543) time: 0.1650 data: 0.0800 max mem: 9377 +Train: [85] [3000/6250] eta: 0:08:53 lr: 0.000007 grad: 0.2040 (0.2549) loss: 0.6460 (0.6543) time: 0.1550 data: 0.0719 max mem: 9377 +Train: [85] [3100/6250] eta: 0:08:38 lr: 0.000007 grad: 0.2000 (0.2550) loss: 0.6604 (0.6544) time: 0.1919 data: 0.1043 max mem: 9377 +Train: [85] [3200/6250] eta: 0:08:22 lr: 0.000007 grad: 0.2106 (0.2550) loss: 0.6649 (0.6545) time: 0.1455 data: 0.0571 max mem: 9377 +Train: [85] [3300/6250] eta: 0:08:06 lr: 0.000007 grad: 0.2061 (0.2552) loss: 0.6537 (0.6545) time: 0.1782 data: 0.0959 max mem: 9377 +Train: [85] [3400/6250] eta: 0:07:49 lr: 0.000007 grad: 0.1951 (0.2548) loss: 0.6523 (0.6546) time: 0.1652 data: 0.0737 max mem: 9377 +Train: [85] [3500/6250] eta: 0:07:32 lr: 0.000007 grad: 0.2012 (0.2546) loss: 0.6544 (0.6547) time: 0.1508 data: 0.0526 max mem: 9377 +Train: [85] [3600/6250] eta: 0:07:16 lr: 0.000007 grad: 0.1990 (0.2544) loss: 0.6576 (0.6548) time: 0.1651 data: 0.0750 max mem: 9377 +Train: [85] [3700/6250] eta: 0:06:59 lr: 0.000007 grad: 0.2046 (0.2539) loss: 0.6473 (0.6551) time: 0.1530 data: 0.0643 max mem: 9377 +Train: [85] [3800/6250] eta: 0:06:42 lr: 0.000007 grad: 0.1903 (0.2530) loss: 0.6806 (0.6554) time: 0.1475 data: 0.0551 max mem: 9377 +Train: [85] [3900/6250] eta: 0:06:26 lr: 0.000007 grad: 0.2012 (0.2534) loss: 0.6509 (0.6555) time: 0.1710 data: 0.0748 max mem: 9377 +Train: [85] [4000/6250] eta: 0:06:09 lr: 0.000007 grad: 0.1956 (0.2538) loss: 0.6654 (0.6556) time: 0.1881 data: 0.1015 max mem: 9377 +Train: [85] [4100/6250] eta: 0:05:52 lr: 0.000007 grad: 0.2017 (0.2537) loss: 0.6678 (0.6557) time: 0.1690 data: 0.0841 max mem: 9377 +Train: [85] [4200/6250] eta: 0:05:35 lr: 0.000007 grad: 0.2015 (0.2534) loss: 0.6512 (0.6558) time: 0.1603 data: 0.0796 max mem: 9377 +Train: [85] [4300/6250] eta: 0:05:19 lr: 0.000007 grad: 0.1955 (0.2539) loss: 0.6722 (0.6560) time: 0.1505 data: 0.0570 max mem: 9377 +Train: [85] [4400/6250] eta: 0:05:02 lr: 0.000007 grad: 0.2046 (0.2540) loss: 0.6526 (0.6563) time: 0.1094 data: 0.0039 max mem: 9377 +Train: [85] [4500/6250] eta: 0:04:46 lr: 0.000007 grad: 0.2057 (0.2534) loss: 0.6623 (0.6564) time: 0.1398 data: 0.0468 max mem: 9377 +Train: [85] [4600/6250] eta: 0:04:29 lr: 0.000007 grad: 0.1992 (0.2525) loss: 0.6634 (0.6567) time: 0.1602 data: 0.0756 max mem: 9377 +Train: [85] [4700/6250] eta: 0:04:13 lr: 0.000007 grad: 0.1986 (0.2524) loss: 0.6667 (0.6569) time: 0.1601 data: 0.0738 max mem: 9377 +Train: [85] [4800/6250] eta: 0:03:56 lr: 0.000007 grad: 0.2015 (0.2524) loss: 0.6545 (0.6571) time: 0.1709 data: 0.0874 max mem: 9377 +Train: [85] [4900/6250] eta: 0:03:40 lr: 0.000007 grad: 0.2049 (0.2524) loss: 0.6609 (0.6573) time: 0.1502 data: 0.0634 max mem: 9377 +Train: [85] [5000/6250] eta: 0:03:24 lr: 0.000007 grad: 0.2073 (0.2526) loss: 0.6640 (0.6574) time: 0.1746 data: 0.0931 max mem: 9377 +Train: [85] [5100/6250] eta: 0:03:08 lr: 0.000007 grad: 0.2011 (0.2529) loss: 0.6613 (0.6576) time: 0.1664 data: 0.0944 max mem: 9377 +Train: [85] [5200/6250] eta: 0:02:52 lr: 0.000007 grad: 0.2083 (0.2536) loss: 0.6713 (0.6577) time: 0.1751 data: 0.0893 max mem: 9377 +Train: [85] [5300/6250] eta: 0:02:36 lr: 0.000007 grad: 0.1989 (0.2539) loss: 0.6576 (0.6579) time: 0.1653 data: 0.0711 max mem: 9377 +Train: [85] [5400/6250] eta: 0:02:19 lr: 0.000007 grad: 0.2085 (0.2542) loss: 0.6638 (0.6580) time: 0.1877 data: 0.0960 max mem: 9377 +Train: [85] [5500/6250] eta: 0:02:03 lr: 0.000007 grad: 0.2036 (0.2544) loss: 0.6569 (0.6581) time: 0.1695 data: 0.0779 max mem: 9377 +Train: [85] [5600/6250] eta: 0:01:47 lr: 0.000007 grad: 0.2024 (0.2544) loss: 0.6663 (0.6583) time: 0.1746 data: 0.0814 max mem: 9377 +Train: [85] [5700/6250] eta: 0:01:30 lr: 0.000007 grad: 0.1962 (0.2539) loss: 0.6603 (0.6583) time: 0.1525 data: 0.0609 max mem: 9377 +Train: [85] [5800/6250] eta: 0:01:13 lr: 0.000007 grad: 0.2053 (0.2539) loss: 0.6584 (0.6583) time: 0.1461 data: 0.0589 max mem: 9377 +Train: [85] [5900/6250] eta: 0:00:57 lr: 0.000007 grad: 0.2068 (0.2543) loss: 0.6578 (0.6583) time: 0.1705 data: 0.0907 max mem: 9377 +Train: [85] [6000/6250] eta: 0:00:41 lr: 0.000007 grad: 0.2062 (0.2544) loss: 0.6583 (0.6583) time: 0.1570 data: 0.0714 max mem: 9377 +Train: [85] [6100/6250] eta: 0:00:24 lr: 0.000007 grad: 0.2156 (0.2547) loss: 0.6638 (0.6584) time: 0.1396 data: 0.0594 max mem: 9377 +Train: [85] [6200/6250] eta: 0:00:08 lr: 0.000007 grad: 0.1982 (0.2555) loss: 0.6564 (0.6585) time: 0.1663 data: 0.0767 max mem: 9377 +Train: [85] [6249/6250] eta: 0:00:00 lr: 0.000007 grad: 0.1978 (0.2553) loss: 0.6598 (0.6586) time: 0.1645 data: 0.0796 max mem: 9377 +Train: [85] Total time: 0:17:10 (0.1649 s / it) +Averaged stats: lr: 0.000007 grad: 0.1978 (0.2553) loss: 0.6598 (0.6586) +Eval (hcp-train-subset): [85] [ 0/62] eta: 0:05:30 loss: 0.8969 (0.8969) time: 5.3257 data: 5.2583 max mem: 9377 +Eval (hcp-train-subset): [85] [61/62] eta: 0:00:00 loss: 0.9108 (0.9102) time: 0.1269 data: 0.1017 max mem: 9377 +Eval (hcp-train-subset): [85] Total time: 0:00:14 (0.2387 s / it) +Averaged stats (hcp-train-subset): loss: 0.9108 (0.9102) +Eval (hcp-val): [85] [ 0/62] eta: 0:04:40 loss: 0.9118 (0.9118) time: 4.5304 data: 4.4308 max mem: 9377 +Eval (hcp-val): [85] [61/62] eta: 0:00:00 loss: 0.9106 (0.9106) time: 0.1281 data: 0.1028 max mem: 9377 +Eval (hcp-val): [85] Total time: 0:00:14 (0.2404 s / it) +Averaged stats (hcp-val): loss: 0.9106 (0.9106) +Eval (nsd-val): [85] [ 0/62] eta: 0:04:33 loss: 0.9133 (0.9133) time: 4.4036 data: 4.3261 max mem: 9377 +Eval (nsd-val): [85] [61/62] eta: 0:00:00 loss: 0.9146 (0.9164) time: 0.1312 data: 0.1051 max mem: 9377 +Eval (nsd-val): [85] Total time: 0:00:14 (0.2320 s / it) +Averaged stats (nsd-val): loss: 0.9146 (0.9164) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [86] [ 0/6250] eta: 9:59:54 lr: 0.000007 grad: 0.2116 (0.2116) loss: 0.7494 (0.7494) time: 5.7591 data: 5.4589 max mem: 9377 +Train: [86] [ 100/6250] eta: 0:23:09 lr: 0.000007 grad: 0.2206 (0.2542) loss: 0.6753 (0.6648) time: 0.1384 data: 0.0213 max mem: 9377 +Train: [86] [ 200/6250] eta: 0:19:29 lr: 0.000007 grad: 0.2111 (0.2670) loss: 0.6716 (0.6628) time: 0.1641 data: 0.0534 max mem: 9377 +Train: [86] [ 300/6250] eta: 0:18:11 lr: 0.000007 grad: 0.2097 (0.2641) loss: 0.6594 (0.6641) time: 0.1475 data: 0.0363 max mem: 9377 +Train: [86] [ 400/6250] eta: 0:17:14 lr: 0.000007 grad: 0.2023 (0.2723) loss: 0.6567 (0.6626) time: 0.1525 data: 0.0518 max mem: 9377 +Train: [86] [ 500/6250] eta: 0:16:30 lr: 0.000007 grad: 0.2038 (0.2688) loss: 0.6648 (0.6612) time: 0.1551 data: 0.0515 max mem: 9377 +Train: [86] [ 600/6250] eta: 0:15:53 lr: 0.000006 grad: 0.1968 (0.2672) loss: 0.6561 (0.6616) time: 0.1489 data: 0.0416 max mem: 9377 +Train: [86] [ 700/6250] eta: 0:15:30 lr: 0.000006 grad: 0.2035 (0.2621) loss: 0.6600 (0.6612) time: 0.1948 data: 0.1049 max mem: 9377 +Train: [86] [ 800/6250] eta: 0:15:03 lr: 0.000006 grad: 0.2073 (0.2674) loss: 0.6638 (0.6612) time: 0.1564 data: 0.0663 max mem: 9377 +Train: [86] [ 900/6250] eta: 0:14:45 lr: 0.000006 grad: 0.1978 (0.2646) loss: 0.6571 (0.6611) time: 0.1583 data: 0.0649 max mem: 9377 +Train: [86] [1000/6250] eta: 0:14:33 lr: 0.000006 grad: 0.2055 (0.2624) loss: 0.6503 (0.6609) time: 0.1757 data: 0.0924 max mem: 9377 +Train: [86] [1100/6250] eta: 0:14:22 lr: 0.000006 grad: 0.1940 (0.2613) loss: 0.6638 (0.6608) time: 0.1800 data: 0.0865 max mem: 9377 +Train: [86] [1200/6250] eta: 0:14:06 lr: 0.000006 grad: 0.1957 (0.2596) loss: 0.6711 (0.6606) time: 0.1657 data: 0.0855 max mem: 9377 +Train: [86] [1300/6250] eta: 0:13:49 lr: 0.000006 grad: 0.2019 (0.2591) loss: 0.6540 (0.6602) time: 0.2041 data: 0.1131 max mem: 9377 +Train: [86] [1400/6250] eta: 0:13:32 lr: 0.000006 grad: 0.2019 (0.2598) loss: 0.6570 (0.6600) time: 0.1707 data: 0.0789 max mem: 9377 +Train: [86] [1500/6250] eta: 0:13:18 lr: 0.000006 grad: 0.1999 (0.2590) loss: 0.6575 (0.6600) time: 0.1734 data: 0.0845 max mem: 9377 +Train: [86] [1600/6250] eta: 0:13:01 lr: 0.000006 grad: 0.2016 (0.2596) loss: 0.6554 (0.6601) time: 0.1696 data: 0.0782 max mem: 9377 +Train: [86] [1700/6250] eta: 0:12:43 lr: 0.000006 grad: 0.1993 (0.2586) loss: 0.6513 (0.6599) time: 0.1510 data: 0.0568 max mem: 9377 +Train: [86] [1800/6250] eta: 0:12:28 lr: 0.000006 grad: 0.2060 (0.2584) loss: 0.6473 (0.6597) time: 0.1944 data: 0.1062 max mem: 9377 +Train: [86] [1900/6250] eta: 0:12:08 lr: 0.000006 grad: 0.2016 (0.2610) loss: 0.6662 (0.6596) time: 0.1614 data: 0.0614 max mem: 9377 +Train: [86] [2000/6250] eta: 0:11:49 lr: 0.000006 grad: 0.2026 (0.2644) loss: 0.6582 (0.6593) time: 0.1655 data: 0.0799 max mem: 9377 +Train: [86] [2100/6250] eta: 0:11:30 lr: 0.000006 grad: 0.2235 (0.2642) loss: 0.6641 (0.6593) time: 0.1542 data: 0.0636 max mem: 9377 +Train: [86] [2200/6250] eta: 0:11:12 lr: 0.000006 grad: 0.2034 (0.2650) loss: 0.6627 (0.6590) time: 0.1563 data: 0.0703 max mem: 9377 +Train: [86] [2300/6250] eta: 0:10:54 lr: 0.000006 grad: 0.2077 (0.2651) loss: 0.6629 (0.6590) time: 0.1729 data: 0.0878 max mem: 9377 +Train: [86] [2400/6250] eta: 0:10:36 lr: 0.000006 grad: 0.2068 (0.2652) loss: 0.6666 (0.6591) time: 0.1677 data: 0.0776 max mem: 9377 +Train: [86] [2500/6250] eta: 0:10:18 lr: 0.000006 grad: 0.1976 (0.2647) loss: 0.6701 (0.6592) time: 0.1552 data: 0.0642 max mem: 9377 +Train: [86] [2600/6250] eta: 0:10:01 lr: 0.000006 grad: 0.2001 (0.2647) loss: 0.6636 (0.6593) time: 0.1945 data: 0.1122 max mem: 9377 +Train: [86] [2700/6250] eta: 0:09:43 lr: 0.000006 grad: 0.2049 (0.2656) loss: 0.6566 (0.6594) time: 0.1573 data: 0.0657 max mem: 9377 +Train: [86] [2800/6250] eta: 0:09:28 lr: 0.000006 grad: 0.1996 (0.2661) loss: 0.6686 (0.6596) time: 0.1656 data: 0.0688 max mem: 9377 +Train: [86] [2900/6250] eta: 0:09:12 lr: 0.000006 grad: 0.2479 (0.2671) loss: 0.6782 (0.6599) time: 0.1642 data: 0.0763 max mem: 9377 +Train: [86] [3000/6250] eta: 0:08:55 lr: 0.000006 grad: 0.2445 (0.2676) loss: 0.6603 (0.6600) time: 0.1651 data: 0.0792 max mem: 9377 +Train: [86] [3100/6250] eta: 0:08:40 lr: 0.000006 grad: 0.2007 (0.2679) loss: 0.6552 (0.6601) time: 0.2657 data: 0.1780 max mem: 9377 +Train: [86] [3200/6250] eta: 0:08:22 lr: 0.000006 grad: 0.2056 (0.2668) loss: 0.6545 (0.6601) time: 0.1654 data: 0.0733 max mem: 9377 +Train: [86] [3300/6250] eta: 0:08:05 lr: 0.000006 grad: 0.2013 (0.2665) loss: 0.6547 (0.6600) time: 0.1427 data: 0.0506 max mem: 9377 +Train: [86] [3400/6250] eta: 0:07:48 lr: 0.000006 grad: 0.2212 (0.2657) loss: 0.6590 (0.6602) time: 0.1301 data: 0.0396 max mem: 9377 +Train: [86] [3500/6250] eta: 0:07:31 lr: 0.000006 grad: 0.2054 (0.2656) loss: 0.6459 (0.6602) time: 0.1308 data: 0.0317 max mem: 9377 +Train: [86] [3600/6250] eta: 0:07:14 lr: 0.000006 grad: 0.2044 (0.2653) loss: 0.6665 (0.6601) time: 0.1506 data: 0.0621 max mem: 9377 +Train: [86] [3700/6250] eta: 0:06:56 lr: 0.000006 grad: 0.2048 (0.2654) loss: 0.6626 (0.6601) time: 0.1362 data: 0.0406 max mem: 9377 +Train: [86] [3800/6250] eta: 0:06:39 lr: 0.000006 grad: 0.2027 (0.2662) loss: 0.6598 (0.6602) time: 0.1471 data: 0.0490 max mem: 9377 +Train: [86] [3900/6250] eta: 0:06:22 lr: 0.000006 grad: 0.1956 (0.2655) loss: 0.6705 (0.6603) time: 0.1453 data: 0.0544 max mem: 9377 +Train: [86] [4000/6250] eta: 0:06:05 lr: 0.000006 grad: 0.2016 (0.2658) loss: 0.6641 (0.6605) time: 0.1418 data: 0.0605 max mem: 9377 +Train: [86] [4100/6250] eta: 0:05:48 lr: 0.000006 grad: 0.1942 (0.2651) loss: 0.6697 (0.6607) time: 0.1471 data: 0.0533 max mem: 9377 +Train: [86] [4200/6250] eta: 0:05:32 lr: 0.000006 grad: 0.1958 (0.2641) loss: 0.6680 (0.6609) time: 0.1360 data: 0.0425 max mem: 9377 +Train: [86] [4300/6250] eta: 0:05:15 lr: 0.000006 grad: 0.2022 (0.2638) loss: 0.6736 (0.6611) time: 0.1441 data: 0.0626 max mem: 9377 +Train: [86] [4400/6250] eta: 0:04:58 lr: 0.000006 grad: 0.1995 (0.2635) loss: 0.6683 (0.6613) time: 0.1446 data: 0.0580 max mem: 9377 +Train: [86] [4500/6250] eta: 0:04:42 lr: 0.000006 grad: 0.2004 (0.2632) loss: 0.6587 (0.6614) time: 0.1670 data: 0.0828 max mem: 9377 +Train: [86] [4600/6250] eta: 0:04:26 lr: 0.000006 grad: 0.2051 (0.2625) loss: 0.6583 (0.6614) time: 0.1601 data: 0.0709 max mem: 9377 +Train: [86] [4700/6250] eta: 0:04:09 lr: 0.000006 grad: 0.1945 (0.2617) loss: 0.6443 (0.6613) time: 0.1411 data: 0.0582 max mem: 9377 +Train: [86] [4800/6250] eta: 0:03:53 lr: 0.000006 grad: 0.1996 (0.2613) loss: 0.6479 (0.6613) time: 0.1898 data: 0.1042 max mem: 9377 +Train: [86] [4900/6250] eta: 0:03:37 lr: 0.000006 grad: 0.2006 (0.2608) loss: 0.6609 (0.6613) time: 0.1835 data: 0.0990 max mem: 9377 +Train: [86] [5000/6250] eta: 0:03:21 lr: 0.000006 grad: 0.1949 (0.2602) loss: 0.6560 (0.6612) time: 0.1758 data: 0.0877 max mem: 9377 +Train: [86] [5100/6250] eta: 0:03:05 lr: 0.000006 grad: 0.1968 (0.2595) loss: 0.6659 (0.6613) time: 0.1753 data: 0.0868 max mem: 9377 +Train: [86] [5200/6250] eta: 0:02:49 lr: 0.000006 grad: 0.2018 (0.2589) loss: 0.6418 (0.6613) time: 0.1746 data: 0.0858 max mem: 9377 +Train: [86] [5300/6250] eta: 0:02:33 lr: 0.000006 grad: 0.1941 (0.2591) loss: 0.6654 (0.6614) time: 0.1490 data: 0.0603 max mem: 9377 +Train: [86] [5400/6250] eta: 0:02:17 lr: 0.000006 grad: 0.1941 (0.2590) loss: 0.6627 (0.6613) time: 0.1522 data: 0.0598 max mem: 9377 +Train: [86] [5500/6250] eta: 0:02:01 lr: 0.000006 grad: 0.1956 (0.2586) loss: 0.6672 (0.6613) time: 0.1515 data: 0.0646 max mem: 9377 +Train: [86] [5600/6250] eta: 0:01:44 lr: 0.000006 grad: 0.1937 (0.2586) loss: 0.6536 (0.6613) time: 0.1312 data: 0.0435 max mem: 9377 +Train: [86] [5700/6250] eta: 0:01:28 lr: 0.000006 grad: 0.1981 (0.2580) loss: 0.6642 (0.6613) time: 0.1548 data: 0.0740 max mem: 9377 +Train: [86] [5800/6250] eta: 0:01:12 lr: 0.000006 grad: 0.1966 (0.2586) loss: 0.6693 (0.6614) time: 0.1409 data: 0.0465 max mem: 9377 +Train: [86] [5900/6250] eta: 0:00:56 lr: 0.000006 grad: 0.1922 (0.2581) loss: 0.6688 (0.6616) time: 0.1264 data: 0.0290 max mem: 9377 +Train: [86] [6000/6250] eta: 0:00:40 lr: 0.000006 grad: 0.2041 (0.2578) loss: 0.6644 (0.6617) time: 0.1646 data: 0.0784 max mem: 9377 +Train: [86] [6100/6250] eta: 0:00:24 lr: 0.000006 grad: 0.1902 (0.2580) loss: 0.6650 (0.6618) time: 0.1787 data: 0.0845 max mem: 9377 +Train: [86] [6200/6250] eta: 0:00:08 lr: 0.000006 grad: 0.1983 (0.2577) loss: 0.6588 (0.6618) time: 0.1415 data: 0.0545 max mem: 9377 +Train: [86] [6249/6250] eta: 0:00:00 lr: 0.000006 grad: 0.1926 (0.2573) loss: 0.6717 (0.6619) time: 0.1546 data: 0.0619 max mem: 9377 +Train: [86] Total time: 0:16:46 (0.1610 s / it) +Averaged stats: lr: 0.000006 grad: 0.1926 (0.2573) loss: 0.6717 (0.6619) +Eval (hcp-train-subset): [86] [ 0/62] eta: 0:05:32 loss: 0.8999 (0.8999) time: 5.3603 data: 5.3209 max mem: 9377 +Eval (hcp-train-subset): [86] [61/62] eta: 0:00:00 loss: 0.9094 (0.9110) time: 0.1361 data: 0.1092 max mem: 9377 +Eval (hcp-train-subset): [86] Total time: 0:00:14 (0.2362 s / it) +Averaged stats (hcp-train-subset): loss: 0.9094 (0.9110) +Eval (hcp-val): [86] [ 0/62] eta: 0:05:52 loss: 0.9163 (0.9163) time: 5.6778 data: 5.6227 max mem: 9377 +Eval (hcp-val): [86] [61/62] eta: 0:00:00 loss: 0.9100 (0.9106) time: 0.1456 data: 0.1203 max mem: 9377 +Eval (hcp-val): [86] Total time: 0:00:14 (0.2343 s / it) +Averaged stats (hcp-val): loss: 0.9100 (0.9106) +Eval (nsd-val): [86] [ 0/62] eta: 0:06:37 loss: 0.9178 (0.9178) time: 6.4058 data: 6.3749 max mem: 9377 +Eval (nsd-val): [86] [61/62] eta: 0:00:00 loss: 0.9167 (0.9214) time: 0.1442 data: 0.1189 max mem: 9377 +Eval (nsd-val): [86] Total time: 0:00:14 (0.2380 s / it) +Averaged stats (nsd-val): loss: 0.9167 (0.9214) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [87] [ 0/6250] eta: 9:43:01 lr: 0.000006 grad: 0.3153 (0.3153) loss: 0.5446 (0.5446) time: 5.5970 data: 5.3453 max mem: 9377 +Train: [87] [ 100/6250] eta: 0:23:07 lr: 0.000006 grad: 0.2181 (0.2622) loss: 0.6602 (0.6576) time: 0.1806 data: 0.0715 max mem: 9377 +Train: [87] [ 200/6250] eta: 0:20:35 lr: 0.000006 grad: 0.2060 (0.2575) loss: 0.6631 (0.6610) time: 0.1969 data: 0.0971 max mem: 9377 +Train: [87] [ 300/6250] eta: 0:19:00 lr: 0.000006 grad: 0.2075 (0.2520) loss: 0.6779 (0.6630) time: 0.1650 data: 0.0738 max mem: 9377 +Train: [87] [ 400/6250] eta: 0:18:11 lr: 0.000006 grad: 0.2070 (0.2504) loss: 0.6737 (0.6657) time: 0.1587 data: 0.0661 max mem: 9377 +Train: [87] [ 500/6250] eta: 0:17:23 lr: 0.000006 grad: 0.1974 (0.2548) loss: 0.6460 (0.6648) time: 0.1650 data: 0.0736 max mem: 9377 +Train: [87] [ 600/6250] eta: 0:16:51 lr: 0.000006 grad: 0.2006 (0.2510) loss: 0.6440 (0.6637) time: 0.1533 data: 0.0514 max mem: 9377 +Train: [87] [ 700/6250] eta: 0:16:23 lr: 0.000006 grad: 0.1987 (0.2564) loss: 0.6616 (0.6628) time: 0.1788 data: 0.0894 max mem: 9377 +Train: [87] [ 800/6250] eta: 0:15:55 lr: 0.000006 grad: 0.1950 (0.2595) loss: 0.6642 (0.6627) time: 0.1384 data: 0.0285 max mem: 9377 +Train: [87] [ 900/6250] eta: 0:15:36 lr: 0.000006 grad: 0.2044 (0.2596) loss: 0.6593 (0.6623) time: 0.1775 data: 0.0867 max mem: 9377 +Train: [87] [1000/6250] eta: 0:15:27 lr: 0.000006 grad: 0.2024 (0.2578) loss: 0.6563 (0.6616) time: 0.1953 data: 0.1143 max mem: 9377 +Train: [87] [1100/6250] eta: 0:15:02 lr: 0.000006 grad: 0.1965 (0.2567) loss: 0.6571 (0.6611) time: 0.1559 data: 0.0792 max mem: 9377 +Train: [87] [1200/6250] eta: 0:14:39 lr: 0.000006 grad: 0.2032 (0.2558) loss: 0.6501 (0.6608) time: 0.1625 data: 0.0648 max mem: 9377 +Train: [87] [1300/6250] eta: 0:14:12 lr: 0.000006 grad: 0.1959 (0.2540) loss: 0.6645 (0.6605) time: 0.1403 data: 0.0579 max mem: 9377 +Train: [87] [1400/6250] eta: 0:13:51 lr: 0.000005 grad: 0.2086 (0.2546) loss: 0.6493 (0.6606) time: 0.1533 data: 0.0670 max mem: 9377 +Train: [87] [1500/6250] eta: 0:13:32 lr: 0.000005 grad: 0.2088 (0.2548) loss: 0.6550 (0.6605) time: 0.1546 data: 0.0620 max mem: 9377 +Train: [87] [1600/6250] eta: 0:13:12 lr: 0.000005 grad: 0.1972 (0.2536) loss: 0.6673 (0.6603) time: 0.1562 data: 0.0659 max mem: 9377 +Train: [87] [1700/6250] eta: 0:12:49 lr: 0.000005 grad: 0.1994 (0.2532) loss: 0.6648 (0.6603) time: 0.1521 data: 0.0509 max mem: 9377 +Train: [87] [1800/6250] eta: 0:12:29 lr: 0.000005 grad: 0.2000 (0.2525) loss: 0.6589 (0.6603) time: 0.1553 data: 0.0657 max mem: 9377 +Train: [87] [1900/6250] eta: 0:12:07 lr: 0.000005 grad: 0.1943 (0.2528) loss: 0.6682 (0.6606) time: 0.1671 data: 0.0795 max mem: 9377 +Train: [87] [2000/6250] eta: 0:11:47 lr: 0.000005 grad: 0.1945 (0.2527) loss: 0.6626 (0.6607) time: 0.1609 data: 0.0702 max mem: 9377 +Train: [87] [2100/6250] eta: 0:11:27 lr: 0.000005 grad: 0.2040 (0.2524) loss: 0.6538 (0.6609) time: 0.1463 data: 0.0583 max mem: 9377 +Train: [87] [2200/6250] eta: 0:11:08 lr: 0.000005 grad: 0.1943 (0.2510) loss: 0.6690 (0.6611) time: 0.1507 data: 0.0624 max mem: 9377 +Train: [87] [2300/6250] eta: 0:10:50 lr: 0.000005 grad: 0.1957 (0.2524) loss: 0.6565 (0.6611) time: 0.1584 data: 0.0743 max mem: 9377 +Train: [87] [2400/6250] eta: 0:10:32 lr: 0.000005 grad: 0.1926 (0.2527) loss: 0.6696 (0.6613) time: 0.1614 data: 0.0675 max mem: 9377 +Train: [87] [2500/6250] eta: 0:10:14 lr: 0.000005 grad: 0.1999 (0.2534) loss: 0.6630 (0.6616) time: 0.1578 data: 0.0782 max mem: 9377 +Train: [87] [2600/6250] eta: 0:09:56 lr: 0.000005 grad: 0.1967 (0.2531) loss: 0.6605 (0.6616) time: 0.1507 data: 0.0659 max mem: 9377 +Train: [87] [2700/6250] eta: 0:09:39 lr: 0.000005 grad: 0.2008 (0.2525) loss: 0.6714 (0.6615) time: 0.1627 data: 0.0792 max mem: 9377 +Train: [87] [2800/6250] eta: 0:09:23 lr: 0.000005 grad: 0.1943 (0.2512) loss: 0.6589 (0.6617) time: 0.1472 data: 0.0618 max mem: 9377 +Train: [87] [2900/6250] eta: 0:09:07 lr: 0.000005 grad: 0.1938 (0.2511) loss: 0.6581 (0.6619) time: 0.1955 data: 0.1092 max mem: 9377 +Train: [87] [3000/6250] eta: 0:08:50 lr: 0.000005 grad: 0.2018 (0.2511) loss: 0.6638 (0.6619) time: 0.1644 data: 0.0788 max mem: 9377 +Train: [87] [3100/6250] eta: 0:08:32 lr: 0.000005 grad: 0.1915 (0.2514) loss: 0.6724 (0.6622) time: 0.1613 data: 0.0770 max mem: 9377 +Train: [87] [3200/6250] eta: 0:08:16 lr: 0.000005 grad: 0.1958 (0.2513) loss: 0.6728 (0.6624) time: 0.1503 data: 0.0566 max mem: 9377 +Train: [87] [3300/6250] eta: 0:08:01 lr: 0.000005 grad: 0.1985 (0.2509) loss: 0.6645 (0.6626) time: 0.1915 data: 0.1022 max mem: 9377 +Train: [87] [3400/6250] eta: 0:07:44 lr: 0.000005 grad: 0.1968 (0.2503) loss: 0.6731 (0.6627) time: 0.1353 data: 0.0432 max mem: 9377 +Train: [87] [3500/6250] eta: 0:07:27 lr: 0.000005 grad: 0.1891 (0.2500) loss: 0.6683 (0.6629) time: 0.1721 data: 0.0826 max mem: 9377 +Train: [87] [3600/6250] eta: 0:07:11 lr: 0.000005 grad: 0.1904 (0.2504) loss: 0.6646 (0.6630) time: 0.1430 data: 0.0587 max mem: 9377 +Train: [87] [3700/6250] eta: 0:06:54 lr: 0.000005 grad: 0.2024 (0.2501) loss: 0.6736 (0.6632) time: 0.1542 data: 0.0544 max mem: 9377 +Train: [87] [3800/6250] eta: 0:06:38 lr: 0.000005 grad: 0.2011 (0.2503) loss: 0.6698 (0.6634) time: 0.1714 data: 0.0881 max mem: 9377 +Train: [87] [3900/6250] eta: 0:06:21 lr: 0.000005 grad: 0.1933 (0.2496) loss: 0.6611 (0.6635) time: 0.1846 data: 0.0923 max mem: 9377 +Train: [87] [4000/6250] eta: 0:06:04 lr: 0.000005 grad: 0.1984 (0.2493) loss: 0.6637 (0.6635) time: 0.1547 data: 0.0702 max mem: 9377 +Train: [87] [4100/6250] eta: 0:05:47 lr: 0.000005 grad: 0.1895 (0.2492) loss: 0.6622 (0.6635) time: 0.1559 data: 0.0681 max mem: 9377 +Train: [87] [4200/6250] eta: 0:05:31 lr: 0.000005 grad: 0.2061 (0.2489) loss: 0.6779 (0.6637) time: 0.1551 data: 0.0685 max mem: 9377 +Train: [87] [4300/6250] eta: 0:05:15 lr: 0.000005 grad: 0.1976 (0.2485) loss: 0.6658 (0.6638) time: 0.1711 data: 0.0853 max mem: 9377 +Train: [87] [4400/6250] eta: 0:04:58 lr: 0.000005 grad: 0.1876 (0.2482) loss: 0.6712 (0.6639) time: 0.1588 data: 0.0686 max mem: 9377 +Train: [87] [4500/6250] eta: 0:04:42 lr: 0.000005 grad: 0.1954 (0.2480) loss: 0.6738 (0.6641) time: 0.1467 data: 0.0568 max mem: 9377 +Train: [87] [4600/6250] eta: 0:04:26 lr: 0.000005 grad: 0.2020 (0.2477) loss: 0.6606 (0.6642) time: 0.1433 data: 0.0580 max mem: 9377 +Train: [87] [4700/6250] eta: 0:04:09 lr: 0.000005 grad: 0.1994 (0.2476) loss: 0.6619 (0.6642) time: 0.1480 data: 0.0608 max mem: 9377 +Train: [87] [4800/6250] eta: 0:03:53 lr: 0.000005 grad: 0.1956 (0.2476) loss: 0.6552 (0.6643) time: 0.1554 data: 0.0680 max mem: 9377 +Train: [87] [4900/6250] eta: 0:03:37 lr: 0.000005 grad: 0.1892 (0.2472) loss: 0.6740 (0.6644) time: 0.2192 data: 0.1436 max mem: 9377 +Train: [87] [5000/6250] eta: 0:03:21 lr: 0.000005 grad: 0.1980 (0.2471) loss: 0.6730 (0.6645) time: 0.1538 data: 0.0721 max mem: 9377 +Train: [87] [5100/6250] eta: 0:03:05 lr: 0.000005 grad: 0.1965 (0.2468) loss: 0.6687 (0.6647) time: 0.1460 data: 0.0605 max mem: 9377 +Train: [87] [5200/6250] eta: 0:02:49 lr: 0.000005 grad: 0.2021 (0.2470) loss: 0.6655 (0.6647) time: 0.1488 data: 0.0682 max mem: 9377 +Train: [87] [5300/6250] eta: 0:02:33 lr: 0.000005 grad: 0.1970 (0.2476) loss: 0.6693 (0.6649) time: 0.1596 data: 0.0735 max mem: 9377 +Train: [87] [5400/6250] eta: 0:02:17 lr: 0.000005 grad: 0.1957 (0.2474) loss: 0.6656 (0.6649) time: 0.1783 data: 0.0862 max mem: 9377 +Train: [87] [5500/6250] eta: 0:02:01 lr: 0.000005 grad: 0.1961 (0.2471) loss: 0.6707 (0.6650) time: 0.1690 data: 0.0788 max mem: 9377 +Train: [87] [5600/6250] eta: 0:01:44 lr: 0.000005 grad: 0.1976 (0.2469) loss: 0.6763 (0.6651) time: 0.1550 data: 0.0669 max mem: 9377 +Train: [87] [5700/6250] eta: 0:01:28 lr: 0.000005 grad: 0.1953 (0.2470) loss: 0.6645 (0.6652) time: 0.1727 data: 0.0907 max mem: 9377 +Train: [87] [5800/6250] eta: 0:01:12 lr: 0.000005 grad: 0.1966 (0.2467) loss: 0.6691 (0.6652) time: 0.1562 data: 0.0691 max mem: 9377 +Train: [87] [5900/6250] eta: 0:00:56 lr: 0.000005 grad: 0.1986 (0.2466) loss: 0.6566 (0.6652) time: 0.1677 data: 0.0781 max mem: 9377 +Train: [87] [6000/6250] eta: 0:00:40 lr: 0.000005 grad: 0.1891 (0.2466) loss: 0.6654 (0.6651) time: 0.1703 data: 0.0857 max mem: 9377 +Train: [87] [6100/6250] eta: 0:00:24 lr: 0.000005 grad: 0.1950 (0.2468) loss: 0.6593 (0.6651) time: 0.1430 data: 0.0564 max mem: 9377 +Train: [87] [6200/6250] eta: 0:00:08 lr: 0.000005 grad: 0.1961 (0.2470) loss: 0.6639 (0.6652) time: 0.1360 data: 0.0493 max mem: 9377 +Train: [87] [6249/6250] eta: 0:00:00 lr: 0.000005 grad: 0.1982 (0.2467) loss: 0.6644 (0.6652) time: 0.1848 data: 0.1019 max mem: 9377 +Train: [87] Total time: 0:16:48 (0.1614 s / it) +Averaged stats: lr: 0.000005 grad: 0.1982 (0.2467) loss: 0.6644 (0.6652) +Eval (hcp-train-subset): [87] [ 0/62] eta: 0:04:37 loss: 0.8981 (0.8981) time: 4.4735 data: 4.3750 max mem: 9377 +Eval (hcp-train-subset): [87] [61/62] eta: 0:00:00 loss: 0.9098 (0.9111) time: 0.1292 data: 0.1043 max mem: 9377 +Eval (hcp-train-subset): [87] Total time: 0:00:15 (0.2450 s / it) +Averaged stats (hcp-train-subset): loss: 0.9098 (0.9111) +Eval (hcp-val): [87] [ 0/62] eta: 0:06:19 loss: 0.9135 (0.9135) time: 6.1137 data: 6.0739 max mem: 9377 +Eval (hcp-val): [87] [61/62] eta: 0:00:00 loss: 0.9097 (0.9109) time: 0.1591 data: 0.1339 max mem: 9377 +Eval (hcp-val): [87] Total time: 0:00:16 (0.2607 s / it) +Averaged stats (hcp-val): loss: 0.9097 (0.9109) +Eval (nsd-val): [87] [ 0/62] eta: 0:05:57 loss: 0.9193 (0.9193) time: 5.7586 data: 5.7274 max mem: 9377 +Eval (nsd-val): [87] [61/62] eta: 0:00:00 loss: 0.9135 (0.9178) time: 0.1517 data: 0.1255 max mem: 9377 +Eval (nsd-val): [87] Total time: 0:00:16 (0.2660 s / it) +Averaged stats (nsd-val): loss: 0.9135 (0.9178) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [88] [ 0/6250] eta: 13:28:52 lr: 0.000005 grad: 0.2304 (0.2304) loss: 0.7159 (0.7159) time: 7.7652 data: 7.6230 max mem: 9377 +Train: [88] [ 100/6250] eta: 0:26:14 lr: 0.000005 grad: 0.2309 (0.2669) loss: 0.6075 (0.6371) time: 0.1971 data: 0.0811 max mem: 9377 +Train: [88] [ 200/6250] eta: 0:23:04 lr: 0.000005 grad: 0.2187 (0.2706) loss: 0.6537 (0.6377) time: 0.2283 data: 0.1201 max mem: 9377 +Train: [88] [ 300/6250] eta: 0:21:04 lr: 0.000005 grad: 0.2095 (0.2540) loss: 0.6691 (0.6442) time: 0.1750 data: 0.0823 max mem: 9377 +Train: [88] [ 400/6250] eta: 0:19:32 lr: 0.000005 grad: 0.1994 (0.2462) loss: 0.6719 (0.6497) time: 0.1532 data: 0.0567 max mem: 9377 +Train: [88] [ 500/6250] eta: 0:18:44 lr: 0.000005 grad: 0.2103 (0.2416) loss: 0.6466 (0.6528) time: 0.1721 data: 0.0705 max mem: 9377 +Train: [88] [ 600/6250] eta: 0:17:53 lr: 0.000005 grad: 0.2019 (0.2441) loss: 0.6598 (0.6553) time: 0.1780 data: 0.0826 max mem: 9377 +Train: [88] [ 700/6250] eta: 0:17:18 lr: 0.000005 grad: 0.2016 (0.2473) loss: 0.6525 (0.6561) time: 0.1971 data: 0.1082 max mem: 9377 +Train: [88] [ 800/6250] eta: 0:16:47 lr: 0.000005 grad: 0.2025 (0.2510) loss: 0.6687 (0.6574) time: 0.1323 data: 0.0376 max mem: 9377 +Train: [88] [ 900/6250] eta: 0:16:17 lr: 0.000005 grad: 0.1981 (0.2486) loss: 0.6654 (0.6574) time: 0.1493 data: 0.0520 max mem: 9377 +Train: [88] [1000/6250] eta: 0:15:50 lr: 0.000005 grad: 0.1946 (0.2467) loss: 0.6450 (0.6573) time: 0.1618 data: 0.0812 max mem: 9377 +Train: [88] [1100/6250] eta: 0:15:25 lr: 0.000005 grad: 0.2090 (0.2471) loss: 0.6485 (0.6568) time: 0.1689 data: 0.0763 max mem: 9377 +Train: [88] [1200/6250] eta: 0:15:00 lr: 0.000005 grad: 0.2041 (0.2487) loss: 0.6489 (0.6566) time: 0.1676 data: 0.0806 max mem: 9377 +Train: [88] [1300/6250] eta: 0:14:37 lr: 0.000005 grad: 0.1956 (0.2489) loss: 0.6698 (0.6565) time: 0.1623 data: 0.0758 max mem: 9377 +Train: [88] [1400/6250] eta: 0:14:21 lr: 0.000005 grad: 0.2032 (0.2501) loss: 0.6513 (0.6563) time: 0.1693 data: 0.0811 max mem: 9377 +Train: [88] [1500/6250] eta: 0:13:58 lr: 0.000005 grad: 0.1978 (0.2499) loss: 0.6665 (0.6563) time: 0.1643 data: 0.0645 max mem: 9377 +Train: [88] [1600/6250] eta: 0:13:37 lr: 0.000005 grad: 0.1984 (0.2489) loss: 0.6626 (0.6564) time: 0.1776 data: 0.0910 max mem: 9377 +Train: [88] [1700/6250] eta: 0:13:13 lr: 0.000005 grad: 0.2002 (0.2485) loss: 0.6511 (0.6564) time: 0.1781 data: 0.0923 max mem: 9377 +Train: [88] [1800/6250] eta: 0:12:49 lr: 0.000005 grad: 0.1969 (0.2481) loss: 0.6626 (0.6567) time: 0.1422 data: 0.0514 max mem: 9377 +Train: [88] [1900/6250] eta: 0:12:27 lr: 0.000005 grad: 0.1993 (0.2477) loss: 0.6547 (0.6568) time: 0.1400 data: 0.0618 max mem: 9377 +Train: [88] [2000/6250] eta: 0:12:05 lr: 0.000005 grad: 0.1948 (0.2481) loss: 0.6694 (0.6570) time: 0.1483 data: 0.0576 max mem: 9377 +Train: [88] [2100/6250] eta: 0:11:45 lr: 0.000005 grad: 0.2046 (0.2483) loss: 0.6498 (0.6574) time: 0.1524 data: 0.0671 max mem: 9377 +Train: [88] [2200/6250] eta: 0:11:24 lr: 0.000005 grad: 0.1968 (0.2474) loss: 0.6565 (0.6578) time: 0.1424 data: 0.0586 max mem: 9377 +Train: [88] [2300/6250] eta: 0:11:05 lr: 0.000005 grad: 0.2001 (0.2483) loss: 0.6643 (0.6581) time: 0.1417 data: 0.0587 max mem: 9377 +Train: [88] [2400/6250] eta: 0:10:46 lr: 0.000005 grad: 0.1945 (0.2470) loss: 0.6601 (0.6583) time: 0.1777 data: 0.0878 max mem: 9377 +Train: [88] [2500/6250] eta: 0:10:26 lr: 0.000005 grad: 0.1965 (0.2467) loss: 0.6578 (0.6586) time: 0.1309 data: 0.0474 max mem: 9377 +Train: [88] [2600/6250] eta: 0:10:08 lr: 0.000005 grad: 0.1971 (0.2469) loss: 0.6631 (0.6587) time: 0.1632 data: 0.0819 max mem: 9377 +Train: [88] [2700/6250] eta: 0:09:50 lr: 0.000005 grad: 0.1925 (0.2458) loss: 0.6711 (0.6590) time: 0.1716 data: 0.0838 max mem: 9377 +Train: [88] [2800/6250] eta: 0:09:32 lr: 0.000005 grad: 0.1971 (0.2453) loss: 0.6620 (0.6591) time: 0.1597 data: 0.0767 max mem: 9377 +Train: [88] [2900/6250] eta: 0:09:17 lr: 0.000004 grad: 0.1992 (0.2446) loss: 0.6661 (0.6592) time: 0.1517 data: 0.0606 max mem: 9377 +Train: [88] [3000/6250] eta: 0:09:02 lr: 0.000004 grad: 0.1975 (0.2437) loss: 0.6593 (0.6593) time: 0.1767 data: 0.0944 max mem: 9377 +Train: [88] [3100/6250] eta: 0:08:45 lr: 0.000004 grad: 0.1951 (0.2432) loss: 0.6635 (0.6594) time: 0.1502 data: 0.0717 max mem: 9377 +Train: [88] [3200/6250] eta: 0:08:30 lr: 0.000004 grad: 0.1924 (0.2427) loss: 0.6715 (0.6594) time: 0.1719 data: 0.0827 max mem: 9377 +Train: [88] [3300/6250] eta: 0:08:13 lr: 0.000004 grad: 0.2094 (0.2431) loss: 0.6582 (0.6593) time: 0.1512 data: 0.0722 max mem: 9377 +Train: [88] [3400/6250] eta: 0:07:56 lr: 0.000004 grad: 0.1979 (0.2430) loss: 0.6552 (0.6591) time: 0.1612 data: 0.0638 max mem: 9377 +Train: [88] [3500/6250] eta: 0:07:39 lr: 0.000004 grad: 0.1949 (0.2428) loss: 0.6566 (0.6591) time: 0.1586 data: 0.0744 max mem: 9377 +Train: [88] [3600/6250] eta: 0:07:22 lr: 0.000004 grad: 0.1953 (0.2422) loss: 0.6582 (0.6591) time: 0.1679 data: 0.0810 max mem: 9377 +Train: [88] [3700/6250] eta: 0:07:06 lr: 0.000004 grad: 0.2097 (0.2420) loss: 0.6592 (0.6591) time: 0.2021 data: 0.1166 max mem: 9377 +Train: [88] [3800/6250] eta: 0:06:48 lr: 0.000004 grad: 0.1932 (0.2414) loss: 0.6467 (0.6591) time: 0.1590 data: 0.0662 max mem: 9377 +Train: [88] [3900/6250] eta: 0:06:30 lr: 0.000004 grad: 0.1933 (0.2412) loss: 0.6725 (0.6592) time: 0.1363 data: 0.0388 max mem: 9377 +Train: [88] [4000/6250] eta: 0:06:12 lr: 0.000004 grad: 0.1944 (0.2410) loss: 0.6523 (0.6592) time: 0.1475 data: 0.0596 max mem: 9377 +Train: [88] [4100/6250] eta: 0:05:55 lr: 0.000004 grad: 0.1998 (0.2411) loss: 0.6538 (0.6592) time: 0.1316 data: 0.0436 max mem: 9377 +Train: [88] [4200/6250] eta: 0:05:38 lr: 0.000004 grad: 0.1971 (0.2413) loss: 0.6510 (0.6591) time: 0.1481 data: 0.0545 max mem: 9377 +Train: [88] [4300/6250] eta: 0:05:21 lr: 0.000004 grad: 0.1992 (0.2411) loss: 0.6767 (0.6592) time: 0.1452 data: 0.0549 max mem: 9377 +Train: [88] [4400/6250] eta: 0:05:04 lr: 0.000004 grad: 0.1988 (0.2408) loss: 0.6519 (0.6591) time: 0.1508 data: 0.0598 max mem: 9377 +Train: [88] [4500/6250] eta: 0:04:47 lr: 0.000004 grad: 0.1978 (0.2404) loss: 0.6459 (0.6592) time: 0.1556 data: 0.0727 max mem: 9377 +Train: [88] [4600/6250] eta: 0:04:31 lr: 0.000004 grad: 0.1920 (0.2403) loss: 0.6534 (0.6592) time: 0.1602 data: 0.0703 max mem: 9377 +Train: [88] [4700/6250] eta: 0:04:15 lr: 0.000004 grad: 0.1979 (0.2411) loss: 0.6625 (0.6592) time: 0.1362 data: 0.0500 max mem: 9377 +Train: [88] [4800/6250] eta: 0:03:58 lr: 0.000004 grad: 0.1986 (0.2422) loss: 0.6562 (0.6592) time: 0.1761 data: 0.0916 max mem: 9377 +Train: [88] [4900/6250] eta: 0:03:42 lr: 0.000004 grad: 0.1975 (0.2418) loss: 0.6583 (0.6592) time: 0.1819 data: 0.0986 max mem: 9377 +Train: [88] [5000/6250] eta: 0:03:26 lr: 0.000004 grad: 0.1956 (0.2419) loss: 0.6658 (0.6593) time: 0.1683 data: 0.0867 max mem: 9377 +Train: [88] [5100/6250] eta: 0:03:09 lr: 0.000004 grad: 0.1924 (0.2419) loss: 0.6693 (0.6594) time: 0.1586 data: 0.0691 max mem: 9377 +Train: [88] [5200/6250] eta: 0:02:53 lr: 0.000004 grad: 0.1963 (0.2420) loss: 0.6526 (0.6594) time: 0.1787 data: 0.0888 max mem: 9377 +Train: [88] [5300/6250] eta: 0:02:36 lr: 0.000004 grad: 0.1955 (0.2417) loss: 0.6644 (0.6595) time: 0.1628 data: 0.0812 max mem: 9377 +Train: [88] [5400/6250] eta: 0:02:20 lr: 0.000004 grad: 0.1957 (0.2415) loss: 0.6621 (0.6596) time: 0.1718 data: 0.0796 max mem: 9377 +Train: [88] [5500/6250] eta: 0:02:03 lr: 0.000004 grad: 0.1905 (0.2415) loss: 0.6732 (0.6598) time: 0.1470 data: 0.0503 max mem: 9377 +Train: [88] [5600/6250] eta: 0:01:47 lr: 0.000004 grad: 0.1936 (0.2414) loss: 0.6743 (0.6599) time: 0.1591 data: 0.0732 max mem: 9377 +Train: [88] [5700/6250] eta: 0:01:30 lr: 0.000004 grad: 0.1985 (0.2412) loss: 0.6714 (0.6601) time: 0.1699 data: 0.0826 max mem: 9377 +Train: [88] [5800/6250] eta: 0:01:14 lr: 0.000004 grad: 0.2015 (0.2414) loss: 0.6589 (0.6602) time: 0.1373 data: 0.0437 max mem: 9377 +Train: [88] [5900/6250] eta: 0:00:57 lr: 0.000004 grad: 0.1913 (0.2413) loss: 0.6830 (0.6604) time: 0.1469 data: 0.0523 max mem: 9377 +Train: [88] [6000/6250] eta: 0:00:41 lr: 0.000004 grad: 0.1917 (0.2410) loss: 0.6676 (0.6606) time: 0.1732 data: 0.0850 max mem: 9377 +Train: [88] [6100/6250] eta: 0:00:24 lr: 0.000004 grad: 0.1908 (0.2412) loss: 0.6789 (0.6608) time: 0.1615 data: 0.0766 max mem: 9377 +Train: [88] [6200/6250] eta: 0:00:08 lr: 0.000004 grad: 0.1995 (0.2411) loss: 0.6738 (0.6610) time: 0.1480 data: 0.0699 max mem: 9377 +Train: [88] [6249/6250] eta: 0:00:00 lr: 0.000004 grad: 0.1952 (0.2408) loss: 0.6628 (0.6611) time: 0.1758 data: 0.0890 max mem: 9377 +Train: [88] Total time: 0:17:11 (0.1650 s / it) +Averaged stats: lr: 0.000004 grad: 0.1952 (0.2408) loss: 0.6628 (0.6611) +Eval (hcp-train-subset): [88] [ 0/62] eta: 0:04:14 loss: 0.9006 (0.9006) time: 4.1048 data: 4.0095 max mem: 9377 +Eval (hcp-train-subset): [88] [61/62] eta: 0:00:00 loss: 0.9101 (0.9118) time: 0.1416 data: 0.1164 max mem: 9377 +Eval (hcp-train-subset): [88] Total time: 0:00:15 (0.2532 s / it) +Averaged stats (hcp-train-subset): loss: 0.9101 (0.9118) +Eval (hcp-val): [88] [ 0/62] eta: 0:04:19 loss: 0.9107 (0.9107) time: 4.1777 data: 4.0680 max mem: 9377 +Eval (hcp-val): [88] [61/62] eta: 0:00:00 loss: 0.9090 (0.9124) time: 0.1324 data: 0.1070 max mem: 9377 +Eval (hcp-val): [88] Total time: 0:00:14 (0.2312 s / it) +Averaged stats (hcp-val): loss: 0.9090 (0.9124) +Eval (nsd-val): [88] [ 0/62] eta: 0:05:58 loss: 0.9154 (0.9154) time: 5.7869 data: 5.7544 max mem: 9377 +Eval (nsd-val): [88] [61/62] eta: 0:00:00 loss: 0.9113 (0.9147) time: 0.1361 data: 0.1091 max mem: 9377 +Eval (nsd-val): [88] Total time: 0:00:14 (0.2355 s / it) +Averaged stats (nsd-val): loss: 0.9113 (0.9147) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [89] [ 0/6250] eta: 10:32:44 lr: 0.000004 grad: 0.2104 (0.2104) loss: 0.7341 (0.7341) time: 6.0744 data: 5.8027 max mem: 9377 +Train: [89] [ 100/6250] eta: 0:23:28 lr: 0.000004 grad: 0.2179 (0.2468) loss: 0.6263 (0.6446) time: 0.1824 data: 0.0712 max mem: 9377 +Train: [89] [ 200/6250] eta: 0:20:07 lr: 0.000004 grad: 0.2028 (0.2423) loss: 0.6608 (0.6490) time: 0.1834 data: 0.0916 max mem: 9377 +Train: [89] [ 300/6250] eta: 0:18:58 lr: 0.000004 grad: 0.1998 (0.2400) loss: 0.6581 (0.6559) time: 0.1566 data: 0.0617 max mem: 9377 +Train: [89] [ 400/6250] eta: 0:18:03 lr: 0.000004 grad: 0.2037 (0.2399) loss: 0.6609 (0.6574) time: 0.1722 data: 0.0698 max mem: 9377 +Train: [89] [ 500/6250] eta: 0:17:23 lr: 0.000004 grad: 0.1984 (0.2435) loss: 0.6665 (0.6580) time: 0.1752 data: 0.0685 max mem: 9377 +Train: [89] [ 600/6250] eta: 0:16:44 lr: 0.000004 grad: 0.2037 (0.2501) loss: 0.6617 (0.6584) time: 0.1611 data: 0.0630 max mem: 9377 +Train: [89] [ 700/6250] eta: 0:16:09 lr: 0.000004 grad: 0.1974 (0.2506) loss: 0.6578 (0.6593) time: 0.1489 data: 0.0550 max mem: 9377 +Train: [89] [ 800/6250] eta: 0:15:39 lr: 0.000004 grad: 0.1961 (0.2524) loss: 0.6735 (0.6589) time: 0.1225 data: 0.0229 max mem: 9377 +Train: [89] [ 900/6250] eta: 0:15:17 lr: 0.000004 grad: 0.1962 (0.2494) loss: 0.6495 (0.6580) time: 0.1601 data: 0.0709 max mem: 9377 +Train: [89] [1000/6250] eta: 0:15:01 lr: 0.000004 grad: 0.1939 (0.2522) loss: 0.6624 (0.6577) time: 0.1518 data: 0.0660 max mem: 9377 +Train: [89] [1100/6250] eta: 0:14:40 lr: 0.000004 grad: 0.2023 (0.2512) loss: 0.6431 (0.6574) time: 0.1583 data: 0.0707 max mem: 9377 +Train: [89] [1200/6250] eta: 0:14:19 lr: 0.000004 grad: 0.1960 (0.2494) loss: 0.6543 (0.6573) time: 0.1533 data: 0.0584 max mem: 9377 +Train: [89] [1300/6250] eta: 0:13:56 lr: 0.000004 grad: 0.1999 (0.2502) loss: 0.6488 (0.6572) time: 0.1487 data: 0.0711 max mem: 9377 +Train: [89] [1400/6250] eta: 0:13:40 lr: 0.000004 grad: 0.1956 (0.2480) loss: 0.6562 (0.6573) time: 0.1513 data: 0.0592 max mem: 9377 +Train: [89] [1500/6250] eta: 0:13:24 lr: 0.000004 grad: 0.1974 (0.2467) loss: 0.6420 (0.6570) time: 0.1577 data: 0.0663 max mem: 9377 +Train: [89] [1600/6250] eta: 0:13:02 lr: 0.000004 grad: 0.1943 (0.2475) loss: 0.6637 (0.6569) time: 0.1404 data: 0.0441 max mem: 9377 +Train: [89] [1700/6250] eta: 0:12:41 lr: 0.000004 grad: 0.1935 (0.2460) loss: 0.6621 (0.6565) time: 0.1602 data: 0.0810 max mem: 9377 +Train: [89] [1800/6250] eta: 0:12:19 lr: 0.000004 grad: 0.1952 (0.2451) loss: 0.6640 (0.6566) time: 0.1387 data: 0.0512 max mem: 9377 +Train: [89] [1900/6250] eta: 0:11:58 lr: 0.000004 grad: 0.1938 (0.2446) loss: 0.6637 (0.6564) time: 0.1352 data: 0.0372 max mem: 9377 +Train: [89] [2000/6250] eta: 0:11:37 lr: 0.000004 grad: 0.1974 (0.2442) loss: 0.6660 (0.6565) time: 0.1409 data: 0.0560 max mem: 9377 +Train: [89] [2100/6250] eta: 0:11:19 lr: 0.000004 grad: 0.1959 (0.2450) loss: 0.6491 (0.6564) time: 0.1414 data: 0.0544 max mem: 9377 +Train: [89] [2200/6250] eta: 0:11:01 lr: 0.000004 grad: 0.2002 (0.2451) loss: 0.6458 (0.6562) time: 0.1507 data: 0.0656 max mem: 9377 +Train: [89] [2300/6250] eta: 0:10:45 lr: 0.000004 grad: 0.2004 (0.2449) loss: 0.6604 (0.6563) time: 0.1598 data: 0.0727 max mem: 9377 +Train: [89] [2400/6250] eta: 0:10:26 lr: 0.000004 grad: 0.1966 (0.2450) loss: 0.6543 (0.6563) time: 0.1317 data: 0.0434 max mem: 9377 +Train: [89] [2500/6250] eta: 0:10:08 lr: 0.000004 grad: 0.1953 (0.2449) loss: 0.6521 (0.6564) time: 0.1550 data: 0.0668 max mem: 9377 +Train: [89] [2600/6250] eta: 0:09:51 lr: 0.000004 grad: 0.1942 (0.2447) loss: 0.6681 (0.6566) time: 0.1555 data: 0.0681 max mem: 9377 +Train: [89] [2700/6250] eta: 0:09:35 lr: 0.000004 grad: 0.1979 (0.2457) loss: 0.6589 (0.6568) time: 0.1796 data: 0.0912 max mem: 9377 +Train: [89] [2800/6250] eta: 0:09:17 lr: 0.000004 grad: 0.1934 (0.2452) loss: 0.6701 (0.6571) time: 0.1436 data: 0.0590 max mem: 9377 +Train: [89] [2900/6250] eta: 0:09:02 lr: 0.000004 grad: 0.1899 (0.2450) loss: 0.6745 (0.6573) time: 0.1575 data: 0.0789 max mem: 9377 +Train: [89] [3000/6250] eta: 0:08:45 lr: 0.000004 grad: 0.1948 (0.2450) loss: 0.6657 (0.6575) time: 0.1551 data: 0.0632 max mem: 9377 +Train: [89] [3100/6250] eta: 0:08:29 lr: 0.000004 grad: 0.2012 (0.2445) loss: 0.6729 (0.6576) time: 0.1858 data: 0.1062 max mem: 9377 +Train: [89] [3200/6250] eta: 0:08:13 lr: 0.000004 grad: 0.1942 (0.2442) loss: 0.6702 (0.6578) time: 0.1630 data: 0.0769 max mem: 9377 +Train: [89] [3300/6250] eta: 0:07:56 lr: 0.000004 grad: 0.1951 (0.2437) loss: 0.6508 (0.6578) time: 0.1455 data: 0.0562 max mem: 9377 +Train: [89] [3400/6250] eta: 0:07:40 lr: 0.000004 grad: 0.2045 (0.2438) loss: 0.6661 (0.6580) time: 0.1608 data: 0.0724 max mem: 9377 +Train: [89] [3500/6250] eta: 0:07:23 lr: 0.000004 grad: 0.1955 (0.2437) loss: 0.6510 (0.6580) time: 0.1274 data: 0.0231 max mem: 9377 +Train: [89] [3600/6250] eta: 0:07:05 lr: 0.000004 grad: 0.2035 (0.2441) loss: 0.6560 (0.6580) time: 0.1302 data: 0.0268 max mem: 9377 +Train: [89] [3700/6250] eta: 0:06:48 lr: 0.000004 grad: 0.2017 (0.2446) loss: 0.6532 (0.6579) time: 0.1335 data: 0.0367 max mem: 9377 +Train: [89] [3800/6250] eta: 0:06:31 lr: 0.000004 grad: 0.1985 (0.2444) loss: 0.6472 (0.6577) time: 0.1426 data: 0.0603 max mem: 9377 +Train: [89] [3900/6250] eta: 0:06:15 lr: 0.000004 grad: 0.1996 (0.2437) loss: 0.6474 (0.6576) time: 0.1726 data: 0.0848 max mem: 9377 +Train: [89] [4000/6250] eta: 0:05:58 lr: 0.000004 grad: 0.2004 (0.2445) loss: 0.6706 (0.6578) time: 0.1659 data: 0.0874 max mem: 9377 +Train: [89] [4100/6250] eta: 0:05:42 lr: 0.000004 grad: 0.2002 (0.2447) loss: 0.6431 (0.6578) time: 0.1821 data: 0.1023 max mem: 9377 +Train: [89] [4200/6250] eta: 0:05:25 lr: 0.000004 grad: 0.1951 (0.2445) loss: 0.6534 (0.6579) time: 0.1574 data: 0.0791 max mem: 9377 +Train: [89] [4300/6250] eta: 0:05:09 lr: 0.000004 grad: 0.2021 (0.2443) loss: 0.6465 (0.6579) time: 0.1508 data: 0.0705 max mem: 9377 +Train: [89] [4400/6250] eta: 0:04:53 lr: 0.000004 grad: 0.1982 (0.2438) loss: 0.6496 (0.6578) time: 0.1382 data: 0.0533 max mem: 9377 +Train: [89] [4500/6250] eta: 0:04:37 lr: 0.000004 grad: 0.1909 (0.2439) loss: 0.6631 (0.6579) time: 0.1496 data: 0.0671 max mem: 9377 +Train: [89] [4600/6250] eta: 0:04:22 lr: 0.000004 grad: 0.1964 (0.2439) loss: 0.6608 (0.6579) time: 0.1812 data: 0.0998 max mem: 9377 +Train: [89] [4700/6250] eta: 0:04:06 lr: 0.000004 grad: 0.2000 (0.2451) loss: 0.6416 (0.6578) time: 0.1623 data: 0.0801 max mem: 9377 +Train: [89] [4800/6250] eta: 0:03:50 lr: 0.000004 grad: 0.1972 (0.2453) loss: 0.6632 (0.6577) time: 0.1268 data: 0.0365 max mem: 9377 +Train: [89] [4900/6250] eta: 0:03:34 lr: 0.000004 grad: 0.2021 (0.2455) loss: 0.6513 (0.6575) time: 0.1500 data: 0.0627 max mem: 9377 +Train: [89] [5000/6250] eta: 0:03:18 lr: 0.000004 grad: 0.1973 (0.2455) loss: 0.6480 (0.6574) time: 0.1708 data: 0.0871 max mem: 9377 +Train: [89] [5100/6250] eta: 0:03:02 lr: 0.000004 grad: 0.1969 (0.2458) loss: 0.6647 (0.6572) time: 0.1468 data: 0.0575 max mem: 9377 +Train: [89] [5200/6250] eta: 0:02:47 lr: 0.000003 grad: 0.1954 (0.2451) loss: 0.6488 (0.6572) time: 0.1591 data: 0.0687 max mem: 9377 +Train: [89] [5300/6250] eta: 0:02:31 lr: 0.000003 grad: 0.1987 (0.2451) loss: 0.6533 (0.6572) time: 0.1472 data: 0.0584 max mem: 9377 +Train: [89] [5400/6250] eta: 0:02:15 lr: 0.000003 grad: 0.1935 (0.2448) loss: 0.6583 (0.6572) time: 0.1751 data: 0.0869 max mem: 9377 +Train: [89] [5500/6250] eta: 0:01:59 lr: 0.000003 grad: 0.1991 (0.2449) loss: 0.6552 (0.6572) time: 0.1612 data: 0.0721 max mem: 9377 +Train: [89] [5600/6250] eta: 0:01:43 lr: 0.000003 grad: 0.1925 (0.2455) loss: 0.6606 (0.6573) time: 0.1644 data: 0.0748 max mem: 9377 +Train: [89] [5700/6250] eta: 0:01:27 lr: 0.000003 grad: 0.1966 (0.2454) loss: 0.6595 (0.6573) time: 0.1365 data: 0.0462 max mem: 9377 +Train: [89] [5800/6250] eta: 0:01:11 lr: 0.000003 grad: 0.1979 (0.2454) loss: 0.6629 (0.6574) time: 0.1433 data: 0.0552 max mem: 9377 +Train: [89] [5900/6250] eta: 0:00:55 lr: 0.000003 grad: 0.2014 (0.2453) loss: 0.6447 (0.6574) time: 0.1478 data: 0.0523 max mem: 9377 +Train: [89] [6000/6250] eta: 0:00:39 lr: 0.000003 grad: 0.1957 (0.2450) loss: 0.6613 (0.6575) time: 0.1622 data: 0.0810 max mem: 9377 +Train: [89] [6100/6250] eta: 0:00:23 lr: 0.000003 grad: 0.1960 (0.2454) loss: 0.6655 (0.6575) time: 0.1525 data: 0.0696 max mem: 9377 +Train: [89] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1917 (0.2453) loss: 0.6643 (0.6577) time: 0.1358 data: 0.0419 max mem: 9377 +Train: [89] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.1894 (0.2452) loss: 0.6799 (0.6578) time: 0.1452 data: 0.0566 max mem: 9377 +Train: [89] Total time: 0:16:33 (0.1590 s / it) +Averaged stats: lr: 0.000003 grad: 0.1894 (0.2452) loss: 0.6799 (0.6578) +Eval (hcp-train-subset): [89] [ 0/62] eta: 0:05:23 loss: 0.8983 (0.8983) time: 5.2248 data: 5.1853 max mem: 9377 +Eval (hcp-train-subset): [89] [61/62] eta: 0:00:00 loss: 0.9134 (0.9132) time: 0.1312 data: 0.1047 max mem: 9377 +Eval (hcp-train-subset): [89] Total time: 0:00:14 (0.2282 s / it) +Averaged stats (hcp-train-subset): loss: 0.9134 (0.9132) +Making plots (hcp-train-subset): example=62 +Eval (hcp-val): [89] [ 0/62] eta: 0:04:37 loss: 0.9161 (0.9161) time: 4.4749 data: 4.3978 max mem: 9377 +Eval (hcp-val): [89] [61/62] eta: 0:00:00 loss: 0.9101 (0.9113) time: 0.1134 data: 0.0883 max mem: 9377 +Eval (hcp-val): [89] Total time: 0:00:14 (0.2411 s / it) +Averaged stats (hcp-val): loss: 0.9101 (0.9113) +Making plots (hcp-val): example=26 +Eval (nsd-val): [89] [ 0/62] eta: 0:06:31 loss: 0.9142 (0.9142) time: 6.3218 data: 6.2909 max mem: 9377 +Eval (nsd-val): [89] [61/62] eta: 0:00:00 loss: 0.9126 (0.9155) time: 0.1258 data: 0.0989 max mem: 9377 +Eval (nsd-val): [89] Total time: 0:00:14 (0.2265 s / it) +Averaged stats (nsd-val): loss: 0.9126 (0.9155) +Making plots (nsd-val): example=61 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00089.pth +Train: [90] [ 0/6250] eta: 8:38:35 lr: 0.000003 grad: 0.3205 (0.3205) loss: 0.7828 (0.7828) time: 4.9785 data: 4.7249 max mem: 9377 +Train: [90] [ 100/6250] eta: 0:22:43 lr: 0.000003 grad: 0.2053 (0.2700) loss: 0.6716 (0.6841) time: 0.1883 data: 0.0881 max mem: 9377 +Train: [90] [ 200/6250] eta: 0:18:57 lr: 0.000003 grad: 0.2030 (0.2695) loss: 0.6803 (0.6746) time: 0.1593 data: 0.0522 max mem: 9377 +Train: [90] [ 300/6250] eta: 0:17:34 lr: 0.000003 grad: 0.2009 (0.2739) loss: 0.6644 (0.6688) time: 0.1528 data: 0.0615 max mem: 9377 +Train: [90] [ 400/6250] eta: 0:16:32 lr: 0.000003 grad: 0.2023 (0.2661) loss: 0.6374 (0.6632) time: 0.1401 data: 0.0463 max mem: 9377 +Train: [90] [ 500/6250] eta: 0:15:54 lr: 0.000003 grad: 0.1981 (0.2665) loss: 0.6474 (0.6599) time: 0.1440 data: 0.0339 max mem: 9377 +Train: [90] [ 600/6250] eta: 0:15:20 lr: 0.000003 grad: 0.1984 (0.2659) loss: 0.6419 (0.6582) time: 0.1511 data: 0.0582 max mem: 9377 +Train: [90] [ 700/6250] eta: 0:14:55 lr: 0.000003 grad: 0.1951 (0.2595) loss: 0.6586 (0.6579) time: 0.1368 data: 0.0417 max mem: 9377 +Train: [90] [ 800/6250] eta: 0:14:35 lr: 0.000003 grad: 0.1894 (0.2573) loss: 0.6729 (0.6583) time: 0.1688 data: 0.0792 max mem: 9377 +Train: [90] [ 900/6250] eta: 0:14:20 lr: 0.000003 grad: 0.1925 (0.2530) loss: 0.6691 (0.6592) time: 0.2257 data: 0.1228 max mem: 9377 +Train: [90] [1000/6250] eta: 0:14:02 lr: 0.000003 grad: 0.2046 (0.2519) loss: 0.6619 (0.6596) time: 0.1500 data: 0.0724 max mem: 9377 +Train: [90] [1100/6250] eta: 0:13:44 lr: 0.000003 grad: 0.1916 (0.2508) loss: 0.6680 (0.6600) time: 0.1595 data: 0.0676 max mem: 9377 +Train: [90] [1200/6250] eta: 0:13:33 lr: 0.000003 grad: 0.1893 (0.2495) loss: 0.6730 (0.6602) time: 0.1481 data: 0.0604 max mem: 9377 +Train: [90] [1300/6250] eta: 0:13:09 lr: 0.000003 grad: 0.1922 (0.2493) loss: 0.6589 (0.6601) time: 0.1389 data: 0.0524 max mem: 9377 +Train: [90] [1400/6250] eta: 0:12:54 lr: 0.000003 grad: 0.1929 (0.2497) loss: 0.6617 (0.6603) time: 0.1668 data: 0.0835 max mem: 9377 +Train: [90] [1500/6250] eta: 0:12:38 lr: 0.000003 grad: 0.1955 (0.2493) loss: 0.6557 (0.6602) time: 0.1614 data: 0.0783 max mem: 9377 +Train: [90] [1600/6250] eta: 0:12:21 lr: 0.000003 grad: 0.1957 (0.2481) loss: 0.6728 (0.6602) time: 0.1626 data: 0.0621 max mem: 9377 +Train: [90] [1700/6250] eta: 0:12:02 lr: 0.000003 grad: 0.1967 (0.2481) loss: 0.6499 (0.6600) time: 0.1432 data: 0.0477 max mem: 9377 +Train: [90] [1800/6250] eta: 0:11:42 lr: 0.000003 grad: 0.1935 (0.2476) loss: 0.6547 (0.6601) time: 0.1340 data: 0.0469 max mem: 9377 +Train: [90] [1900/6250] eta: 0:11:24 lr: 0.000003 grad: 0.2006 (0.2468) loss: 0.6654 (0.6604) time: 0.1493 data: 0.0543 max mem: 9377 +Train: [90] [2000/6250] eta: 0:11:07 lr: 0.000003 grad: 0.1900 (0.2457) loss: 0.6622 (0.6609) time: 0.1323 data: 0.0368 max mem: 9377 +Train: [90] [2100/6250] eta: 0:10:49 lr: 0.000003 grad: 0.1964 (0.2451) loss: 0.6684 (0.6612) time: 0.1522 data: 0.0681 max mem: 9377 +Train: [90] [2200/6250] eta: 0:10:30 lr: 0.000003 grad: 0.1935 (0.2443) loss: 0.6648 (0.6613) time: 0.1543 data: 0.0743 max mem: 9377 +Train: [90] [2300/6250] eta: 0:10:13 lr: 0.000003 grad: 0.1948 (0.2430) loss: 0.6550 (0.6617) time: 0.1392 data: 0.0528 max mem: 9377 +Train: [90] [2400/6250] eta: 0:09:56 lr: 0.000003 grad: 0.1951 (0.2421) loss: 0.6638 (0.6619) time: 0.1496 data: 0.0607 max mem: 9377 +Train: [90] [2500/6250] eta: 0:09:39 lr: 0.000003 grad: 0.1938 (0.2424) loss: 0.6649 (0.6619) time: 0.1498 data: 0.0649 max mem: 9377 +Train: [90] [2600/6250] eta: 0:09:22 lr: 0.000003 grad: 0.1970 (0.2422) loss: 0.6547 (0.6619) time: 0.1227 data: 0.0246 max mem: 9377 +Train: [90] [2700/6250] eta: 0:09:05 lr: 0.000003 grad: 0.1975 (0.2413) loss: 0.6421 (0.6617) time: 0.1433 data: 0.0508 max mem: 9377 +Train: [90] [2800/6250] eta: 0:08:49 lr: 0.000003 grad: 0.2024 (0.2413) loss: 0.6396 (0.6615) time: 0.1618 data: 0.0864 max mem: 9377 +Train: [90] [2900/6250] eta: 0:08:33 lr: 0.000003 grad: 0.1957 (0.2412) loss: 0.6648 (0.6615) time: 0.1444 data: 0.0513 max mem: 9377 +Train: [90] [3000/6250] eta: 0:08:18 lr: 0.000003 grad: 0.1929 (0.2416) loss: 0.6623 (0.6616) time: 0.1374 data: 0.0523 max mem: 9377 +Train: [90] [3100/6250] eta: 0:08:03 lr: 0.000003 grad: 0.1980 (0.2417) loss: 0.6555 (0.6615) time: 0.1544 data: 0.0713 max mem: 9377 +Train: [90] [3200/6250] eta: 0:07:47 lr: 0.000003 grad: 0.1949 (0.2421) loss: 0.6469 (0.6614) time: 0.0973 data: 0.0152 max mem: 9377 +Train: [90] [3300/6250] eta: 0:07:32 lr: 0.000003 grad: 0.1924 (0.2413) loss: 0.6745 (0.6614) time: 0.1474 data: 0.0564 max mem: 9377 +Train: [90] [3400/6250] eta: 0:07:16 lr: 0.000003 grad: 0.1972 (0.2423) loss: 0.6564 (0.6613) time: 0.1573 data: 0.0759 max mem: 9377 +Train: [90] [3500/6250] eta: 0:07:00 lr: 0.000003 grad: 0.1893 (0.2423) loss: 0.6651 (0.6612) time: 0.1446 data: 0.0515 max mem: 9377 +Train: [90] [3600/6250] eta: 0:06:45 lr: 0.000003 grad: 0.1956 (0.2425) loss: 0.6541 (0.6610) time: 0.1515 data: 0.0595 max mem: 9377 +Train: [90] [3700/6250] eta: 0:06:29 lr: 0.000003 grad: 0.1960 (0.2429) loss: 0.6581 (0.6609) time: 0.1041 data: 0.0158 max mem: 9377 +Train: [90] [3800/6250] eta: 0:06:13 lr: 0.000003 grad: 0.1991 (0.2437) loss: 0.6609 (0.6608) time: 0.1521 data: 0.0615 max mem: 9377 +Train: [90] [3900/6250] eta: 0:05:57 lr: 0.000003 grad: 0.2028 (0.2430) loss: 0.6555 (0.6607) time: 0.1327 data: 0.0491 max mem: 9377 +Train: [90] [4000/6250] eta: 0:05:42 lr: 0.000003 grad: 0.1945 (0.2426) loss: 0.6526 (0.6607) time: 0.1382 data: 0.0543 max mem: 9377 +Train: [90] [4100/6250] eta: 0:05:26 lr: 0.000003 grad: 0.1934 (0.2420) loss: 0.6612 (0.6607) time: 0.1253 data: 0.0361 max mem: 9377 +Train: [90] [4200/6250] eta: 0:05:10 lr: 0.000003 grad: 0.1974 (0.2420) loss: 0.6645 (0.6608) time: 0.1324 data: 0.0495 max mem: 9377 +Train: [90] [4300/6250] eta: 0:04:55 lr: 0.000003 grad: 0.2009 (0.2419) loss: 0.6598 (0.6610) time: 0.1357 data: 0.0475 max mem: 9377 +Train: [90] [4400/6250] eta: 0:04:39 lr: 0.000003 grad: 0.1963 (0.2414) loss: 0.6630 (0.6610) time: 0.1217 data: 0.0314 max mem: 9377 +Train: [90] [4500/6250] eta: 0:04:24 lr: 0.000003 grad: 0.2029 (0.2416) loss: 0.6688 (0.6609) time: 0.1469 data: 0.0614 max mem: 9377 +Train: [90] [4600/6250] eta: 0:04:09 lr: 0.000003 grad: 0.1893 (0.2420) loss: 0.6731 (0.6610) time: 0.1502 data: 0.0631 max mem: 9377 +Train: [90] [4700/6250] eta: 0:03:54 lr: 0.000003 grad: 0.1991 (0.2420) loss: 0.6665 (0.6611) time: 0.1530 data: 0.0617 max mem: 9377 +Train: [90] [4800/6250] eta: 0:03:39 lr: 0.000003 grad: 0.1900 (0.2419) loss: 0.6616 (0.6613) time: 0.1546 data: 0.0711 max mem: 9377 +Train: [90] [4900/6250] eta: 0:03:24 lr: 0.000003 grad: 0.1914 (0.2418) loss: 0.6586 (0.6614) time: 0.1467 data: 0.0704 max mem: 9377 +Train: [90] [5000/6250] eta: 0:03:09 lr: 0.000003 grad: 0.1958 (0.2415) loss: 0.6660 (0.6615) time: 0.1411 data: 0.0579 max mem: 9377 +Train: [90] [5100/6250] eta: 0:02:54 lr: 0.000003 grad: 0.1957 (0.2415) loss: 0.6666 (0.6616) time: 0.1435 data: 0.0667 max mem: 9377 +Train: [90] [5200/6250] eta: 0:02:39 lr: 0.000003 grad: 0.1981 (0.2416) loss: 0.6608 (0.6616) time: 0.1640 data: 0.0797 max mem: 9377 +Train: [90] [5300/6250] eta: 0:02:23 lr: 0.000003 grad: 0.1929 (0.2418) loss: 0.6617 (0.6617) time: 0.1506 data: 0.0710 max mem: 9377 +Train: [90] [5400/6250] eta: 0:02:08 lr: 0.000003 grad: 0.1921 (0.2418) loss: 0.6620 (0.6618) time: 0.1732 data: 0.0824 max mem: 9377 +Train: [90] [5500/6250] eta: 0:01:53 lr: 0.000003 grad: 0.2064 (0.2413) loss: 0.6492 (0.6617) time: 0.1592 data: 0.0702 max mem: 9377 +Train: [90] [5600/6250] eta: 0:01:38 lr: 0.000003 grad: 0.1956 (0.2410) loss: 0.6652 (0.6616) time: 0.1087 data: 0.0122 max mem: 9377 +Train: [90] [5700/6250] eta: 0:01:23 lr: 0.000003 grad: 0.1945 (0.2411) loss: 0.6592 (0.6615) time: 0.1681 data: 0.0764 max mem: 9377 +Train: [90] [5800/6250] eta: 0:01:08 lr: 0.000003 grad: 0.2029 (0.2407) loss: 0.6677 (0.6614) time: 0.1681 data: 0.0815 max mem: 9377 +Train: [90] [5900/6250] eta: 0:00:52 lr: 0.000003 grad: 0.1991 (0.2406) loss: 0.6484 (0.6612) time: 0.1479 data: 0.0584 max mem: 9377 +Train: [90] [6000/6250] eta: 0:00:37 lr: 0.000003 grad: 0.1962 (0.2411) loss: 0.6430 (0.6610) time: 0.1361 data: 0.0470 max mem: 9377 +Train: [90] [6100/6250] eta: 0:00:22 lr: 0.000003 grad: 0.2028 (0.2409) loss: 0.6588 (0.6610) time: 0.1246 data: 0.0433 max mem: 9377 +Train: [90] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1978 (0.2411) loss: 0.6567 (0.6609) time: 0.1460 data: 0.0600 max mem: 9377 +Train: [90] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.2000 (0.2410) loss: 0.6593 (0.6609) time: 0.1286 data: 0.0400 max mem: 9377 +Train: [90] Total time: 0:15:48 (0.1518 s / it) +Averaged stats: lr: 0.000003 grad: 0.2000 (0.2410) loss: 0.6593 (0.6609) +Eval (hcp-train-subset): [90] [ 0/62] eta: 0:05:57 loss: 0.8957 (0.8957) time: 5.7736 data: 5.7418 max mem: 9377 +Eval (hcp-train-subset): [90] [61/62] eta: 0:00:00 loss: 0.9129 (0.9122) time: 0.1397 data: 0.1140 max mem: 9377 +Eval (hcp-train-subset): [90] Total time: 0:00:13 (0.2249 s / it) +Averaged stats (hcp-train-subset): loss: 0.9129 (0.9122) +Eval (hcp-val): [90] [ 0/62] eta: 0:04:35 loss: 0.9168 (0.9168) time: 4.4466 data: 4.3707 max mem: 9377 +Eval (hcp-val): [90] [61/62] eta: 0:00:00 loss: 0.9095 (0.9115) time: 0.1305 data: 0.1052 max mem: 9377 +Eval (hcp-val): [90] Total time: 0:00:14 (0.2341 s / it) +Averaged stats (hcp-val): loss: 0.9095 (0.9115) +Eval (nsd-val): [90] [ 0/62] eta: 0:06:01 loss: 0.9133 (0.9133) time: 5.8328 data: 5.8022 max mem: 9377 +Eval (nsd-val): [90] [61/62] eta: 0:00:00 loss: 0.9139 (0.9167) time: 0.1279 data: 0.1026 max mem: 9377 +Eval (nsd-val): [90] Total time: 0:00:14 (0.2294 s / it) +Averaged stats (nsd-val): loss: 0.9139 (0.9167) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [91] [ 0/6250] eta: 9:48:40 lr: 0.000003 grad: 0.2378 (0.2378) loss: 0.6859 (0.6859) time: 5.6512 data: 5.4052 max mem: 9377 +Train: [91] [ 100/6250] eta: 0:21:36 lr: 0.000003 grad: 0.2076 (0.2622) loss: 0.6421 (0.6826) time: 0.1526 data: 0.0541 max mem: 9377 +Train: [91] [ 200/6250] eta: 0:18:49 lr: 0.000003 grad: 0.2075 (0.2443) loss: 0.6610 (0.6699) time: 0.1643 data: 0.0599 max mem: 9377 +Train: [91] [ 300/6250] eta: 0:17:24 lr: 0.000003 grad: 0.1958 (0.2473) loss: 0.6574 (0.6665) time: 0.1591 data: 0.0573 max mem: 9377 +Train: [91] [ 400/6250] eta: 0:16:30 lr: 0.000003 grad: 0.1967 (0.2441) loss: 0.6838 (0.6684) time: 0.1476 data: 0.0582 max mem: 9377 +Train: [91] [ 500/6250] eta: 0:15:56 lr: 0.000003 grad: 0.1913 (0.2395) loss: 0.6694 (0.6694) time: 0.1608 data: 0.0688 max mem: 9377 +Train: [91] [ 600/6250] eta: 0:15:28 lr: 0.000003 grad: 0.1959 (0.2395) loss: 0.6659 (0.6696) time: 0.1568 data: 0.0559 max mem: 9377 +Train: [91] [ 700/6250] eta: 0:14:58 lr: 0.000003 grad: 0.1927 (0.2373) loss: 0.6584 (0.6686) time: 0.1370 data: 0.0491 max mem: 9377 +Train: [91] [ 800/6250] eta: 0:14:34 lr: 0.000003 grad: 0.2002 (0.2366) loss: 0.6533 (0.6677) time: 0.1521 data: 0.0665 max mem: 9377 +Train: [91] [ 900/6250] eta: 0:14:10 lr: 0.000003 grad: 0.1903 (0.2371) loss: 0.6716 (0.6674) time: 0.1539 data: 0.0541 max mem: 9377 +Train: [91] [1000/6250] eta: 0:13:43 lr: 0.000003 grad: 0.1916 (0.2378) loss: 0.6577 (0.6673) time: 0.1487 data: 0.0568 max mem: 9377 +Train: [91] [1100/6250] eta: 0:13:29 lr: 0.000003 grad: 0.1966 (0.2367) loss: 0.6676 (0.6674) time: 0.1699 data: 0.0808 max mem: 9377 +Train: [91] [1200/6250] eta: 0:13:13 lr: 0.000003 grad: 0.1966 (0.2354) loss: 0.6706 (0.6673) time: 0.1640 data: 0.0764 max mem: 9377 +Train: [91] [1300/6250] eta: 0:12:54 lr: 0.000003 grad: 0.1961 (0.2354) loss: 0.6582 (0.6668) time: 0.1351 data: 0.0414 max mem: 9377 +Train: [91] [1400/6250] eta: 0:12:41 lr: 0.000003 grad: 0.1987 (0.2365) loss: 0.6552 (0.6663) time: 0.1705 data: 0.0830 max mem: 9377 +Train: [91] [1500/6250] eta: 0:12:25 lr: 0.000003 grad: 0.1934 (0.2356) loss: 0.6542 (0.6655) time: 0.1514 data: 0.0673 max mem: 9377 +Train: [91] [1600/6250] eta: 0:12:08 lr: 0.000003 grad: 0.2051 (0.2371) loss: 0.6622 (0.6648) time: 0.1465 data: 0.0510 max mem: 9377 +Train: [91] [1700/6250] eta: 0:11:51 lr: 0.000003 grad: 0.2018 (0.2378) loss: 0.6615 (0.6643) time: 0.1484 data: 0.0608 max mem: 9377 +Train: [91] [1800/6250] eta: 0:11:33 lr: 0.000003 grad: 0.1966 (0.2390) loss: 0.6619 (0.6641) time: 0.1253 data: 0.0381 max mem: 9377 +Train: [91] [1900/6250] eta: 0:11:17 lr: 0.000003 grad: 0.1968 (0.2387) loss: 0.6664 (0.6639) time: 0.1593 data: 0.0774 max mem: 9377 +Train: [91] [2000/6250] eta: 0:11:02 lr: 0.000003 grad: 0.2009 (0.2396) loss: 0.6614 (0.6634) time: 0.1325 data: 0.0393 max mem: 9377 +Train: [91] [2100/6250] eta: 0:10:46 lr: 0.000003 grad: 0.1919 (0.2390) loss: 0.6602 (0.6633) time: 0.1640 data: 0.0773 max mem: 9377 +Train: [91] [2200/6250] eta: 0:10:30 lr: 0.000003 grad: 0.1975 (0.2386) loss: 0.6676 (0.6632) time: 0.1538 data: 0.0657 max mem: 9377 +Train: [91] [2300/6250] eta: 0:10:14 lr: 0.000003 grad: 0.1946 (0.2388) loss: 0.6528 (0.6631) time: 0.1642 data: 0.0839 max mem: 9377 +Train: [91] [2400/6250] eta: 0:09:56 lr: 0.000003 grad: 0.1940 (0.2398) loss: 0.6582 (0.6630) time: 0.1204 data: 0.0301 max mem: 9377 +Train: [91] [2500/6250] eta: 0:09:40 lr: 0.000003 grad: 0.1939 (0.2398) loss: 0.6667 (0.6629) time: 0.1217 data: 0.0432 max mem: 9377 +Train: [91] [2600/6250] eta: 0:09:22 lr: 0.000003 grad: 0.1980 (0.2402) loss: 0.6530 (0.6629) time: 0.1351 data: 0.0426 max mem: 9377 +Train: [91] [2700/6250] eta: 0:09:04 lr: 0.000002 grad: 0.1916 (0.2419) loss: 0.6492 (0.6629) time: 0.1367 data: 0.0512 max mem: 9377 +Train: [91] [2800/6250] eta: 0:08:48 lr: 0.000002 grad: 0.1911 (0.2425) loss: 0.6704 (0.6630) time: 0.1336 data: 0.0489 max mem: 9377 +Train: [91] [2900/6250] eta: 0:08:32 lr: 0.000002 grad: 0.1995 (0.2429) loss: 0.6707 (0.6631) time: 0.1819 data: 0.1019 max mem: 9377 +Train: [91] [3000/6250] eta: 0:08:18 lr: 0.000002 grad: 0.1974 (0.2441) loss: 0.6659 (0.6630) time: 0.1079 data: 0.0186 max mem: 9377 +Train: [91] [3100/6250] eta: 0:08:04 lr: 0.000002 grad: 0.1940 (0.2432) loss: 0.6552 (0.6629) time: 0.1687 data: 0.0900 max mem: 9377 +Train: [91] [3200/6250] eta: 0:07:48 lr: 0.000002 grad: 0.1894 (0.2430) loss: 0.6661 (0.6629) time: 0.1441 data: 0.0573 max mem: 9377 +Train: [91] [3300/6250] eta: 0:07:33 lr: 0.000002 grad: 0.1934 (0.2436) loss: 0.6699 (0.6628) time: 0.1648 data: 0.0861 max mem: 9377 +Train: [91] [3400/6250] eta: 0:07:18 lr: 0.000002 grad: 0.2050 (0.2431) loss: 0.6643 (0.6629) time: 0.1367 data: 0.0399 max mem: 9377 +Train: [91] [3500/6250] eta: 0:07:02 lr: 0.000002 grad: 0.1983 (0.2419) loss: 0.6466 (0.6630) time: 0.1577 data: 0.0676 max mem: 9377 +Train: [91] [3600/6250] eta: 0:06:46 lr: 0.000002 grad: 0.1946 (0.2428) loss: 0.6551 (0.6630) time: 0.1397 data: 0.0398 max mem: 9377 +Train: [91] [3700/6250] eta: 0:06:30 lr: 0.000002 grad: 0.1927 (0.2421) loss: 0.6759 (0.6629) time: 0.1444 data: 0.0487 max mem: 9377 +Train: [91] [3800/6250] eta: 0:06:14 lr: 0.000002 grad: 0.1906 (0.2424) loss: 0.6755 (0.6631) time: 0.1325 data: 0.0475 max mem: 9377 +Train: [91] [3900/6250] eta: 0:05:59 lr: 0.000002 grad: 0.1901 (0.2425) loss: 0.6587 (0.6632) time: 0.1390 data: 0.0406 max mem: 9377 +Train: [91] [4000/6250] eta: 0:05:43 lr: 0.000002 grad: 0.2037 (0.2420) loss: 0.6569 (0.6633) time: 0.1576 data: 0.0699 max mem: 9377 +Train: [91] [4100/6250] eta: 0:05:28 lr: 0.000002 grad: 0.1969 (0.2429) loss: 0.6680 (0.6632) time: 0.1514 data: 0.0554 max mem: 9377 +Train: [91] [4200/6250] eta: 0:05:14 lr: 0.000002 grad: 0.1956 (0.2427) loss: 0.6595 (0.6632) time: 0.1174 data: 0.0002 max mem: 9377 +Train: [91] [4300/6250] eta: 0:05:01 lr: 0.000002 grad: 0.2092 (0.2432) loss: 0.6554 (0.6631) time: 0.1835 data: 0.0708 max mem: 9377 +Train: [91] [4400/6250] eta: 0:04:47 lr: 0.000002 grad: 0.1917 (0.2431) loss: 0.6419 (0.6629) time: 0.3364 data: 0.2525 max mem: 9377 +Train: [91] [4500/6250] eta: 0:04:32 lr: 0.000002 grad: 0.1983 (0.2435) loss: 0.6675 (0.6628) time: 0.1345 data: 0.0389 max mem: 9377 +Train: [91] [4600/6250] eta: 0:04:17 lr: 0.000002 grad: 0.1959 (0.2436) loss: 0.6685 (0.6627) time: 0.1543 data: 0.0667 max mem: 9377 +Train: [91] [4700/6250] eta: 0:04:01 lr: 0.000002 grad: 0.1951 (0.2435) loss: 0.6623 (0.6627) time: 0.1519 data: 0.0699 max mem: 9377 +Train: [91] [4800/6250] eta: 0:03:46 lr: 0.000002 grad: 0.1936 (0.2441) loss: 0.6433 (0.6626) time: 0.1740 data: 0.0703 max mem: 9377 +Train: [91] [4900/6250] eta: 0:03:31 lr: 0.000002 grad: 0.1941 (0.2437) loss: 0.6579 (0.6625) time: 0.1149 data: 0.0003 max mem: 9377 +Train: [91] [5000/6250] eta: 0:03:18 lr: 0.000002 grad: 0.1912 (0.2437) loss: 0.6524 (0.6625) time: 0.3201 data: 0.2331 max mem: 9377 +Train: [91] [5100/6250] eta: 0:03:02 lr: 0.000002 grad: 0.1935 (0.2433) loss: 0.6597 (0.6624) time: 0.1634 data: 0.0813 max mem: 9377 +Train: [91] [5200/6250] eta: 0:02:46 lr: 0.000002 grad: 0.1933 (0.2443) loss: 0.6570 (0.6624) time: 0.1521 data: 0.0624 max mem: 9377 +Train: [91] [5300/6250] eta: 0:02:30 lr: 0.000002 grad: 0.1987 (0.2450) loss: 0.6472 (0.6622) time: 0.1482 data: 0.0665 max mem: 9377 +Train: [91] [5400/6250] eta: 0:02:14 lr: 0.000002 grad: 0.1953 (0.2450) loss: 0.6524 (0.6621) time: 0.1423 data: 0.0534 max mem: 9377 +Train: [91] [5500/6250] eta: 0:01:59 lr: 0.000002 grad: 0.1971 (0.2458) loss: 0.6400 (0.6618) time: 0.1441 data: 0.0457 max mem: 9377 +Train: [91] [5600/6250] eta: 0:01:43 lr: 0.000002 grad: 0.1961 (0.2458) loss: 0.6483 (0.6617) time: 0.1620 data: 0.0569 max mem: 9377 +Train: [91] [5700/6250] eta: 0:01:27 lr: 0.000002 grad: 0.1985 (0.2457) loss: 0.6467 (0.6615) time: 0.1446 data: 0.0510 max mem: 9377 +Train: [91] [5800/6250] eta: 0:01:11 lr: 0.000002 grad: 0.1963 (0.2463) loss: 0.6430 (0.6614) time: 0.1498 data: 0.0566 max mem: 9377 +Train: [91] [5900/6250] eta: 0:00:55 lr: 0.000002 grad: 0.1986 (0.2459) loss: 0.6539 (0.6613) time: 0.1591 data: 0.0604 max mem: 9377 +Train: [91] [6000/6250] eta: 0:00:39 lr: 0.000002 grad: 0.1900 (0.2455) loss: 0.6531 (0.6613) time: 0.1154 data: 0.0259 max mem: 9377 +Train: [91] [6100/6250] eta: 0:00:23 lr: 0.000002 grad: 0.1920 (0.2452) loss: 0.6690 (0.6612) time: 0.1393 data: 0.0488 max mem: 9377 +Train: [91] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1917 (0.2452) loss: 0.6652 (0.6612) time: 0.1445 data: 0.0627 max mem: 9377 +Train: [91] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1903 (0.2450) loss: 0.6845 (0.6612) time: 0.1228 data: 0.0171 max mem: 9377 +Train: [91] Total time: 0:16:31 (0.1586 s / it) +Averaged stats: lr: 0.000002 grad: 0.1903 (0.2450) loss: 0.6845 (0.6612) +Eval (hcp-train-subset): [91] [ 0/62] eta: 0:06:13 loss: 0.8977 (0.8977) time: 6.0237 data: 5.9933 max mem: 9377 +Eval (hcp-train-subset): [91] [61/62] eta: 0:00:00 loss: 0.9116 (0.9130) time: 0.1354 data: 0.1101 max mem: 9377 +Eval (hcp-train-subset): [91] Total time: 0:00:14 (0.2346 s / it) +Averaged stats (hcp-train-subset): loss: 0.9116 (0.9130) +Eval (hcp-val): [91] [ 0/62] eta: 0:05:58 loss: 0.9139 (0.9139) time: 5.7761 data: 5.7443 max mem: 9377 +Eval (hcp-val): [91] [61/62] eta: 0:00:00 loss: 0.9123 (0.9122) time: 0.1350 data: 0.1081 max mem: 9377 +Eval (hcp-val): [91] Total time: 0:00:14 (0.2351 s / it) +Averaged stats (hcp-val): loss: 0.9123 (0.9122) +Eval (nsd-val): [91] [ 0/62] eta: 0:05:40 loss: 0.9276 (0.9276) time: 5.4873 data: 5.4570 max mem: 9377 +Eval (nsd-val): [91] [61/62] eta: 0:00:00 loss: 0.9163 (0.9195) time: 0.1352 data: 0.1082 max mem: 9377 +Eval (nsd-val): [91] Total time: 0:00:14 (0.2307 s / it) +Averaged stats (nsd-val): loss: 0.9163 (0.9195) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [92] [ 0/6250] eta: 9:24:54 lr: 0.000002 grad: 0.2258 (0.2258) loss: 0.6890 (0.6890) time: 5.4231 data: 5.0687 max mem: 9377 +Train: [92] [ 100/6250] eta: 0:22:28 lr: 0.000002 grad: 0.2129 (0.2263) loss: 0.6530 (0.6736) time: 0.1740 data: 0.0703 max mem: 9377 +Train: [92] [ 200/6250] eta: 0:19:19 lr: 0.000002 grad: 0.2126 (0.2312) loss: 0.6494 (0.6584) time: 0.1204 data: 0.0166 max mem: 9377 +Train: [92] [ 300/6250] eta: 0:17:52 lr: 0.000002 grad: 0.2006 (0.2353) loss: 0.6713 (0.6572) time: 0.1452 data: 0.0515 max mem: 9377 +Train: [92] [ 400/6250] eta: 0:16:46 lr: 0.000002 grad: 0.1998 (0.2453) loss: 0.6470 (0.6573) time: 0.1522 data: 0.0526 max mem: 9377 +Train: [92] [ 500/6250] eta: 0:16:03 lr: 0.000002 grad: 0.1999 (0.2570) loss: 0.6730 (0.6582) time: 0.1361 data: 0.0421 max mem: 9377 +Train: [92] [ 600/6250] eta: 0:15:33 lr: 0.000002 grad: 0.2007 (0.2656) loss: 0.6505 (0.6579) time: 0.1590 data: 0.0688 max mem: 9377 +Train: [92] [ 700/6250] eta: 0:15:09 lr: 0.000002 grad: 0.1996 (0.2685) loss: 0.6464 (0.6576) time: 0.1401 data: 0.0538 max mem: 9377 +Train: [92] [ 800/6250] eta: 0:14:48 lr: 0.000002 grad: 0.1956 (0.2696) loss: 0.6620 (0.6588) time: 0.1517 data: 0.0637 max mem: 9377 +Train: [92] [ 900/6250] eta: 0:14:29 lr: 0.000002 grad: 0.1957 (0.2674) loss: 0.6677 (0.6591) time: 0.1582 data: 0.0663 max mem: 9377 +Train: [92] [1000/6250] eta: 0:14:12 lr: 0.000002 grad: 0.1934 (0.2647) loss: 0.6619 (0.6593) time: 0.1918 data: 0.1108 max mem: 9377 +Train: [92] [1100/6250] eta: 0:13:55 lr: 0.000002 grad: 0.1956 (0.2630) loss: 0.6558 (0.6597) time: 0.1461 data: 0.0627 max mem: 9377 +Train: [92] [1200/6250] eta: 0:13:33 lr: 0.000002 grad: 0.1930 (0.2615) loss: 0.6541 (0.6596) time: 0.1463 data: 0.0638 max mem: 9377 +Train: [92] [1300/6250] eta: 0:13:18 lr: 0.000002 grad: 0.1944 (0.2637) loss: 0.6679 (0.6603) time: 0.1502 data: 0.0647 max mem: 9377 +Train: [92] [1400/6250] eta: 0:12:58 lr: 0.000002 grad: 0.1927 (0.2616) loss: 0.6601 (0.6606) time: 0.1429 data: 0.0554 max mem: 9377 +Train: [92] [1500/6250] eta: 0:12:42 lr: 0.000002 grad: 0.1954 (0.2613) loss: 0.6702 (0.6609) time: 0.1545 data: 0.0693 max mem: 9377 +Train: [92] [1600/6250] eta: 0:12:24 lr: 0.000002 grad: 0.1972 (0.2605) loss: 0.6628 (0.6611) time: 0.1450 data: 0.0573 max mem: 9377 +Train: [92] [1700/6250] eta: 0:12:05 lr: 0.000002 grad: 0.1934 (0.2588) loss: 0.6688 (0.6614) time: 0.1610 data: 0.0728 max mem: 9377 +Train: [92] [1800/6250] eta: 0:11:47 lr: 0.000002 grad: 0.2005 (0.2581) loss: 0.6517 (0.6614) time: 0.1640 data: 0.0775 max mem: 9377 +Train: [92] [1900/6250] eta: 0:11:27 lr: 0.000002 grad: 0.1924 (0.2589) loss: 0.6722 (0.6618) time: 0.1427 data: 0.0503 max mem: 9377 +Train: [92] [2000/6250] eta: 0:11:09 lr: 0.000002 grad: 0.1925 (0.2592) loss: 0.6634 (0.6620) time: 0.1492 data: 0.0586 max mem: 9377 +Train: [92] [2100/6250] eta: 0:10:51 lr: 0.000002 grad: 0.1954 (0.2579) loss: 0.6518 (0.6621) time: 0.1524 data: 0.0653 max mem: 9377 +Train: [92] [2200/6250] eta: 0:10:34 lr: 0.000002 grad: 0.2034 (0.2575) loss: 0.6648 (0.6625) time: 0.1487 data: 0.0667 max mem: 9377 +Train: [92] [2300/6250] eta: 0:10:17 lr: 0.000002 grad: 0.1934 (0.2579) loss: 0.6722 (0.6627) time: 0.1546 data: 0.0739 max mem: 9377 +Train: [92] [2400/6250] eta: 0:10:01 lr: 0.000002 grad: 0.1933 (0.2584) loss: 0.6703 (0.6625) time: 0.1595 data: 0.0715 max mem: 9377 +Train: [92] [2500/6250] eta: 0:09:44 lr: 0.000002 grad: 0.1915 (0.2570) loss: 0.6725 (0.6624) time: 0.1424 data: 0.0559 max mem: 9377 +Train: [92] [2600/6250] eta: 0:09:27 lr: 0.000002 grad: 0.1969 (0.2577) loss: 0.6584 (0.6622) time: 0.1522 data: 0.0614 max mem: 9377 +Train: [92] [2700/6250] eta: 0:09:11 lr: 0.000002 grad: 0.1973 (0.2568) loss: 0.6536 (0.6622) time: 0.1637 data: 0.0776 max mem: 9377 +Train: [92] [2800/6250] eta: 0:08:55 lr: 0.000002 grad: 0.1969 (0.2562) loss: 0.6459 (0.6621) time: 0.1515 data: 0.0641 max mem: 9377 +Train: [92] [2900/6250] eta: 0:08:39 lr: 0.000002 grad: 0.2020 (0.2576) loss: 0.6563 (0.6619) time: 0.1663 data: 0.0865 max mem: 9377 +Train: [92] [3000/6250] eta: 0:08:24 lr: 0.000002 grad: 0.1992 (0.2566) loss: 0.6592 (0.6619) time: 0.1455 data: 0.0687 max mem: 9377 +Train: [92] [3100/6250] eta: 0:08:08 lr: 0.000002 grad: 0.1986 (0.2563) loss: 0.6529 (0.6618) time: 0.1403 data: 0.0495 max mem: 9377 +Train: [92] [3200/6250] eta: 0:07:54 lr: 0.000002 grad: 0.1968 (0.2566) loss: 0.6453 (0.6616) time: 0.1653 data: 0.0808 max mem: 9377 +Train: [92] [3300/6250] eta: 0:07:37 lr: 0.000002 grad: 0.1937 (0.2562) loss: 0.6690 (0.6616) time: 0.1627 data: 0.0718 max mem: 9377 +Train: [92] [3400/6250] eta: 0:07:23 lr: 0.000002 grad: 0.1916 (0.2555) loss: 0.6569 (0.6615) time: 0.1735 data: 0.1011 max mem: 9377 +Train: [92] [3500/6250] eta: 0:07:08 lr: 0.000002 grad: 0.1960 (0.2547) loss: 0.6555 (0.6615) time: 0.1923 data: 0.1029 max mem: 9377 +Train: [92] [3600/6250] eta: 0:06:51 lr: 0.000002 grad: 0.1899 (0.2545) loss: 0.6663 (0.6616) time: 0.1397 data: 0.0512 max mem: 9377 +Train: [92] [3700/6250] eta: 0:06:35 lr: 0.000002 grad: 0.1990 (0.2549) loss: 0.6680 (0.6617) time: 0.1235 data: 0.0342 max mem: 9377 +Train: [92] [3800/6250] eta: 0:06:19 lr: 0.000002 grad: 0.1920 (0.2544) loss: 0.6645 (0.6617) time: 0.1306 data: 0.0346 max mem: 9377 +Train: [92] [3900/6250] eta: 0:06:03 lr: 0.000002 grad: 0.1978 (0.2544) loss: 0.6656 (0.6617) time: 0.1403 data: 0.0518 max mem: 9377 +Train: [92] [4000/6250] eta: 0:05:47 lr: 0.000002 grad: 0.1950 (0.2545) loss: 0.6606 (0.6619) time: 0.1474 data: 0.0603 max mem: 9377 +Train: [92] [4100/6250] eta: 0:05:32 lr: 0.000002 grad: 0.1905 (0.2537) loss: 0.6685 (0.6618) time: 0.1546 data: 0.0664 max mem: 9377 +Train: [92] [4200/6250] eta: 0:05:16 lr: 0.000002 grad: 0.1959 (0.2530) loss: 0.6583 (0.6618) time: 0.1511 data: 0.0680 max mem: 9377 +Train: [92] [4300/6250] eta: 0:05:00 lr: 0.000002 grad: 0.1976 (0.2528) loss: 0.6533 (0.6618) time: 0.1503 data: 0.0678 max mem: 9377 +Train: [92] [4400/6250] eta: 0:04:45 lr: 0.000002 grad: 0.1911 (0.2520) loss: 0.6526 (0.6618) time: 0.1205 data: 0.0372 max mem: 9377 +Train: [92] [4500/6250] eta: 0:04:29 lr: 0.000002 grad: 0.1961 (0.2516) loss: 0.6585 (0.6619) time: 0.1036 data: 0.0107 max mem: 9377 +Train: [92] [4600/6250] eta: 0:04:13 lr: 0.000002 grad: 0.1927 (0.2516) loss: 0.6610 (0.6619) time: 0.1671 data: 0.0888 max mem: 9377 +Train: [92] [4700/6250] eta: 0:03:58 lr: 0.000002 grad: 0.1919 (0.2513) loss: 0.6554 (0.6619) time: 0.1441 data: 0.0601 max mem: 9377 +Train: [92] [4800/6250] eta: 0:03:42 lr: 0.000002 grad: 0.1897 (0.2507) loss: 0.6676 (0.6618) time: 0.1433 data: 0.0563 max mem: 9377 +Train: [92] [4900/6250] eta: 0:03:27 lr: 0.000002 grad: 0.1939 (0.2499) loss: 0.6572 (0.6619) time: 0.1582 data: 0.0751 max mem: 9377 +Train: [92] [5000/6250] eta: 0:03:11 lr: 0.000002 grad: 0.1950 (0.2501) loss: 0.6642 (0.6619) time: 0.1284 data: 0.0430 max mem: 9377 +Train: [92] [5100/6250] eta: 0:02:56 lr: 0.000002 grad: 0.1941 (0.2506) loss: 0.6570 (0.6620) time: 0.1645 data: 0.0830 max mem: 9377 +Train: [92] [5200/6250] eta: 0:02:41 lr: 0.000002 grad: 0.1941 (0.2503) loss: 0.6726 (0.6620) time: 0.1537 data: 0.0738 max mem: 9377 +Train: [92] [5300/6250] eta: 0:02:26 lr: 0.000002 grad: 0.1924 (0.2504) loss: 0.6712 (0.6620) time: 0.1637 data: 0.0774 max mem: 9377 +Train: [92] [5400/6250] eta: 0:02:10 lr: 0.000002 grad: 0.2053 (0.2505) loss: 0.6582 (0.6621) time: 0.1384 data: 0.0647 max mem: 9377 +Train: [92] [5500/6250] eta: 0:01:55 lr: 0.000002 grad: 0.1937 (0.2505) loss: 0.6578 (0.6621) time: 0.1635 data: 0.0826 max mem: 9377 +Train: [92] [5600/6250] eta: 0:01:39 lr: 0.000002 grad: 0.1961 (0.2503) loss: 0.6622 (0.6621) time: 0.1585 data: 0.0705 max mem: 9377 +Train: [92] [5700/6250] eta: 0:01:24 lr: 0.000002 grad: 0.1985 (0.2506) loss: 0.6664 (0.6621) time: 0.1360 data: 0.0518 max mem: 9377 +Train: [92] [5800/6250] eta: 0:01:09 lr: 0.000002 grad: 0.1940 (0.2499) loss: 0.6639 (0.6621) time: 0.1409 data: 0.0553 max mem: 9377 +Train: [92] [5900/6250] eta: 0:00:53 lr: 0.000002 grad: 0.1946 (0.2499) loss: 0.6667 (0.6621) time: 0.1468 data: 0.0557 max mem: 9377 +Train: [92] [6000/6250] eta: 0:00:38 lr: 0.000002 grad: 0.1947 (0.2495) loss: 0.6536 (0.6620) time: 0.1498 data: 0.0533 max mem: 9377 +Train: [92] [6100/6250] eta: 0:00:22 lr: 0.000002 grad: 0.1925 (0.2490) loss: 0.6591 (0.6620) time: 0.1669 data: 0.0812 max mem: 9377 +Train: [92] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1985 (0.2490) loss: 0.6476 (0.6619) time: 0.1751 data: 0.0909 max mem: 9377 +Train: [92] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1941 (0.2490) loss: 0.6561 (0.6619) time: 0.1077 data: 0.0208 max mem: 9377 +Train: [92] Total time: 0:16:03 (0.1542 s / it) +Averaged stats: lr: 0.000002 grad: 0.1941 (0.2490) loss: 0.6561 (0.6619) +Eval (hcp-train-subset): [92] [ 0/62] eta: 0:05:20 loss: 0.9013 (0.9013) time: 5.1652 data: 5.1339 max mem: 9377 +Eval (hcp-train-subset): [92] [61/62] eta: 0:00:00 loss: 0.9125 (0.9131) time: 0.1208 data: 0.0956 max mem: 9377 +Eval (hcp-train-subset): [92] Total time: 0:00:13 (0.2198 s / it) +Averaged stats (hcp-train-subset): loss: 0.9125 (0.9131) +Eval (hcp-val): [92] [ 0/62] eta: 0:03:55 loss: 0.9073 (0.9073) time: 3.7942 data: 3.7010 max mem: 9377 +Eval (hcp-val): [92] [61/62] eta: 0:00:00 loss: 0.9102 (0.9123) time: 0.1425 data: 0.1170 max mem: 9377 +Eval (hcp-val): [92] Total time: 0:00:14 (0.2345 s / it) +Averaged stats (hcp-val): loss: 0.9102 (0.9123) +Eval (nsd-val): [92] [ 0/62] eta: 0:05:36 loss: 0.9192 (0.9192) time: 5.4252 data: 5.3937 max mem: 9377 +Eval (nsd-val): [92] [61/62] eta: 0:00:00 loss: 0.9143 (0.9182) time: 0.1434 data: 0.1182 max mem: 9377 +Eval (nsd-val): [92] Total time: 0:00:14 (0.2307 s / it) +Averaged stats (nsd-val): loss: 0.9143 (0.9182) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [93] [ 0/6250] eta: 11:14:57 lr: 0.000002 grad: 0.2166 (0.2166) loss: 0.7637 (0.7637) time: 6.4796 data: 6.3415 max mem: 9377 +Train: [93] [ 100/6250] eta: 0:22:11 lr: 0.000002 grad: 0.2063 (0.2807) loss: 0.6493 (0.6784) time: 0.1610 data: 0.0559 max mem: 9377 +Train: [93] [ 200/6250] eta: 0:19:03 lr: 0.000002 grad: 0.2055 (0.2673) loss: 0.6511 (0.6656) time: 0.1377 data: 0.0220 max mem: 9377 +Train: [93] [ 300/6250] eta: 0:17:38 lr: 0.000002 grad: 0.1932 (0.2554) loss: 0.6770 (0.6640) time: 0.1514 data: 0.0617 max mem: 9377 +Train: [93] [ 400/6250] eta: 0:16:51 lr: 0.000002 grad: 0.1954 (0.2549) loss: 0.6681 (0.6645) time: 0.1809 data: 0.0954 max mem: 9377 +Train: [93] [ 500/6250] eta: 0:16:00 lr: 0.000002 grad: 0.1933 (0.2511) loss: 0.6765 (0.6655) time: 0.1422 data: 0.0481 max mem: 9377 +Train: [93] [ 600/6250] eta: 0:15:24 lr: 0.000002 grad: 0.1994 (0.2491) loss: 0.6669 (0.6674) time: 0.1340 data: 0.0455 max mem: 9377 +Train: [93] [ 700/6250] eta: 0:14:56 lr: 0.000002 grad: 0.1906 (0.2474) loss: 0.6827 (0.6689) time: 0.1497 data: 0.0564 max mem: 9377 +Train: [93] [ 800/6250] eta: 0:14:32 lr: 0.000002 grad: 0.1990 (0.2465) loss: 0.6711 (0.6696) time: 0.1578 data: 0.0705 max mem: 9377 +Train: [93] [ 900/6250] eta: 0:14:08 lr: 0.000002 grad: 0.2071 (0.2474) loss: 0.6648 (0.6698) time: 0.1318 data: 0.0389 max mem: 9377 +Train: [93] [1000/6250] eta: 0:13:47 lr: 0.000002 grad: 0.1968 (0.2462) loss: 0.6857 (0.6704) time: 0.1314 data: 0.0318 max mem: 9377 +Train: [93] [1100/6250] eta: 0:13:32 lr: 0.000002 grad: 0.1973 (0.2480) loss: 0.6602 (0.6703) time: 0.1388 data: 0.0489 max mem: 9377 +Train: [93] [1200/6250] eta: 0:13:20 lr: 0.000002 grad: 0.1936 (0.2492) loss: 0.6565 (0.6702) time: 0.1741 data: 0.0885 max mem: 9377 +Train: [93] [1300/6250] eta: 0:13:05 lr: 0.000002 grad: 0.1977 (0.2489) loss: 0.6646 (0.6700) time: 0.1473 data: 0.0579 max mem: 9377 +Train: [93] [1400/6250] eta: 0:12:49 lr: 0.000002 grad: 0.1929 (0.2506) loss: 0.6686 (0.6700) time: 0.1729 data: 0.0839 max mem: 9377 +Train: [93] [1500/6250] eta: 0:12:35 lr: 0.000002 grad: 0.1960 (0.2517) loss: 0.6676 (0.6697) time: 0.1512 data: 0.0595 max mem: 9377 +Train: [93] [1600/6250] eta: 0:12:18 lr: 0.000002 grad: 0.1954 (0.2514) loss: 0.6685 (0.6697) time: 0.1500 data: 0.0591 max mem: 9377 +Train: [93] [1700/6250] eta: 0:11:57 lr: 0.000002 grad: 0.1993 (0.2499) loss: 0.6546 (0.6697) time: 0.1433 data: 0.0476 max mem: 9377 +Train: [93] [1800/6250] eta: 0:11:38 lr: 0.000002 grad: 0.1934 (0.2490) loss: 0.6715 (0.6698) time: 0.1647 data: 0.0770 max mem: 9377 +Train: [93] [1900/6250] eta: 0:11:20 lr: 0.000002 grad: 0.1927 (0.2500) loss: 0.6744 (0.6699) time: 0.1523 data: 0.0670 max mem: 9377 +Train: [93] [2000/6250] eta: 0:11:01 lr: 0.000002 grad: 0.1914 (0.2507) loss: 0.6558 (0.6698) time: 0.1479 data: 0.0596 max mem: 9377 +Train: [93] [2100/6250] eta: 0:10:45 lr: 0.000002 grad: 0.1959 (0.2520) loss: 0.6753 (0.6699) time: 0.1324 data: 0.0438 max mem: 9377 +Train: [93] [2200/6250] eta: 0:10:27 lr: 0.000002 grad: 0.1914 (0.2511) loss: 0.6654 (0.6702) time: 0.1470 data: 0.0636 max mem: 9377 +Train: [93] [2300/6250] eta: 0:10:11 lr: 0.000001 grad: 0.1975 (0.2517) loss: 0.6591 (0.6702) time: 0.1576 data: 0.0734 max mem: 9377 +Train: [93] [2400/6250] eta: 0:09:55 lr: 0.000001 grad: 0.1944 (0.2532) loss: 0.6589 (0.6701) time: 0.1534 data: 0.0703 max mem: 9377 +Train: [93] [2500/6250] eta: 0:09:38 lr: 0.000001 grad: 0.1950 (0.2520) loss: 0.6586 (0.6699) time: 0.1531 data: 0.0699 max mem: 9377 +Train: [93] [2600/6250] eta: 0:09:21 lr: 0.000001 grad: 0.1956 (0.2515) loss: 0.6602 (0.6700) time: 0.1324 data: 0.0457 max mem: 9377 +Train: [93] [2700/6250] eta: 0:09:06 lr: 0.000001 grad: 0.1931 (0.2511) loss: 0.6700 (0.6700) time: 0.1650 data: 0.0821 max mem: 9377 +Train: [93] [2800/6250] eta: 0:08:49 lr: 0.000001 grad: 0.1936 (0.2507) loss: 0.6694 (0.6699) time: 0.1500 data: 0.0597 max mem: 9377 +Train: [93] [2900/6250] eta: 0:08:33 lr: 0.000001 grad: 0.1979 (0.2500) loss: 0.6588 (0.6698) time: 0.1551 data: 0.0676 max mem: 9377 +Train: [93] [3000/6250] eta: 0:08:19 lr: 0.000001 grad: 0.2008 (0.2491) loss: 0.6681 (0.6696) time: 0.1434 data: 0.0547 max mem: 9377 +Train: [93] [3100/6250] eta: 0:08:04 lr: 0.000001 grad: 0.1966 (0.2496) loss: 0.6533 (0.6694) time: 0.1270 data: 0.0375 max mem: 9377 +Train: [93] [3200/6250] eta: 0:07:48 lr: 0.000001 grad: 0.1958 (0.2499) loss: 0.6565 (0.6691) time: 0.1579 data: 0.0707 max mem: 9377 +Train: [93] [3300/6250] eta: 0:07:34 lr: 0.000001 grad: 0.1971 (0.2494) loss: 0.6423 (0.6687) time: 0.1704 data: 0.0832 max mem: 9377 +Train: [93] [3400/6250] eta: 0:07:19 lr: 0.000001 grad: 0.2315 (0.2495) loss: 0.6570 (0.6683) time: 0.1350 data: 0.0525 max mem: 9377 +Train: [93] [3500/6250] eta: 0:07:03 lr: 0.000001 grad: 0.2002 (0.2495) loss: 0.6527 (0.6679) time: 0.1476 data: 0.0516 max mem: 9377 +Train: [93] [3600/6250] eta: 0:06:48 lr: 0.000001 grad: 0.1972 (0.2487) loss: 0.6595 (0.6677) time: 0.1474 data: 0.0527 max mem: 9377 +Train: [93] [3700/6250] eta: 0:06:32 lr: 0.000001 grad: 0.1968 (0.2483) loss: 0.6582 (0.6674) time: 0.1319 data: 0.0441 max mem: 9377 +Train: [93] [3800/6250] eta: 0:06:16 lr: 0.000001 grad: 0.1984 (0.2479) loss: 0.6567 (0.6672) time: 0.1538 data: 0.0700 max mem: 9377 +Train: [93] [3900/6250] eta: 0:06:00 lr: 0.000001 grad: 0.2017 (0.2483) loss: 0.6685 (0.6671) time: 0.1523 data: 0.0699 max mem: 9377 +Train: [93] [4000/6250] eta: 0:05:45 lr: 0.000001 grad: 0.1928 (0.2488) loss: 0.6668 (0.6671) time: 0.1501 data: 0.0643 max mem: 9377 +Train: [93] [4100/6250] eta: 0:05:29 lr: 0.000001 grad: 0.1943 (0.2484) loss: 0.6605 (0.6671) time: 0.1284 data: 0.0433 max mem: 9377 +Train: [93] [4200/6250] eta: 0:05:13 lr: 0.000001 grad: 0.1908 (0.2483) loss: 0.6743 (0.6670) time: 0.1639 data: 0.0814 max mem: 9377 +Train: [93] [4300/6250] eta: 0:04:58 lr: 0.000001 grad: 0.1973 (0.2478) loss: 0.6621 (0.6670) time: 0.1339 data: 0.0577 max mem: 9377 +Train: [93] [4400/6250] eta: 0:04:42 lr: 0.000001 grad: 0.1938 (0.2489) loss: 0.6672 (0.6669) time: 0.1507 data: 0.0696 max mem: 9377 +Train: [93] [4500/6250] eta: 0:04:26 lr: 0.000001 grad: 0.1904 (0.2486) loss: 0.6672 (0.6668) time: 0.1194 data: 0.0286 max mem: 9377 +Train: [93] [4600/6250] eta: 0:04:11 lr: 0.000001 grad: 0.1934 (0.2488) loss: 0.6772 (0.6669) time: 0.1496 data: 0.0597 max mem: 9377 +Train: [93] [4700/6250] eta: 0:03:56 lr: 0.000001 grad: 0.1900 (0.2487) loss: 0.6650 (0.6668) time: 0.1543 data: 0.0633 max mem: 9377 +Train: [93] [4800/6250] eta: 0:03:40 lr: 0.000001 grad: 0.2002 (0.2484) loss: 0.6667 (0.6667) time: 0.1420 data: 0.0546 max mem: 9377 +Train: [93] [4900/6250] eta: 0:03:25 lr: 0.000001 grad: 0.1938 (0.2488) loss: 0.6728 (0.6667) time: 0.1575 data: 0.0729 max mem: 9377 +Train: [93] [5000/6250] eta: 0:03:09 lr: 0.000001 grad: 0.1979 (0.2484) loss: 0.6522 (0.6667) time: 0.1577 data: 0.0703 max mem: 9377 +Train: [93] [5100/6250] eta: 0:02:54 lr: 0.000001 grad: 0.1970 (0.2477) loss: 0.6522 (0.6666) time: 0.1575 data: 0.0685 max mem: 9377 +Train: [93] [5200/6250] eta: 0:02:39 lr: 0.000001 grad: 0.1939 (0.2476) loss: 0.6655 (0.6666) time: 0.1621 data: 0.0828 max mem: 9377 +Train: [93] [5300/6250] eta: 0:02:24 lr: 0.000001 grad: 0.1936 (0.2470) loss: 0.6636 (0.6667) time: 0.1869 data: 0.0972 max mem: 9377 +Train: [93] [5400/6250] eta: 0:02:09 lr: 0.000001 grad: 0.1946 (0.2472) loss: 0.6466 (0.6666) time: 0.1605 data: 0.0785 max mem: 9377 +Train: [93] [5500/6250] eta: 0:01:54 lr: 0.000001 grad: 0.1904 (0.2469) loss: 0.6569 (0.6667) time: 0.1518 data: 0.0643 max mem: 9377 +Train: [93] [5600/6250] eta: 0:01:38 lr: 0.000001 grad: 0.1875 (0.2463) loss: 0.6754 (0.6668) time: 0.1460 data: 0.0564 max mem: 9377 +Train: [93] [5700/6250] eta: 0:01:23 lr: 0.000001 grad: 0.1888 (0.2461) loss: 0.6703 (0.6668) time: 0.1603 data: 0.0670 max mem: 9377 +Train: [93] [5800/6250] eta: 0:01:08 lr: 0.000001 grad: 0.2048 (0.2461) loss: 0.6506 (0.6668) time: 0.1180 data: 0.0332 max mem: 9377 +Train: [93] [5900/6250] eta: 0:00:53 lr: 0.000001 grad: 0.1939 (0.2465) loss: 0.6654 (0.6667) time: 0.1435 data: 0.0537 max mem: 9377 +Train: [93] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.1913 (0.2463) loss: 0.6751 (0.6667) time: 0.1273 data: 0.0287 max mem: 9377 +Train: [93] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1926 (0.2471) loss: 0.6895 (0.6667) time: 0.1489 data: 0.0695 max mem: 9377 +Train: [93] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1930 (0.2474) loss: 0.6680 (0.6667) time: 0.1565 data: 0.0699 max mem: 9377 +Train: [93] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1948 (0.2475) loss: 0.6623 (0.6667) time: 0.1496 data: 0.0591 max mem: 9377 +Train: [93] Total time: 0:15:54 (0.1527 s / it) +Averaged stats: lr: 0.000001 grad: 0.1948 (0.2475) loss: 0.6623 (0.6667) +Eval (hcp-train-subset): [93] [ 0/62] eta: 0:04:27 loss: 0.9006 (0.9006) time: 4.3170 data: 4.2217 max mem: 9377 +Eval (hcp-train-subset): [93] [61/62] eta: 0:00:00 loss: 0.9125 (0.9126) time: 0.1230 data: 0.0978 max mem: 9377 +Eval (hcp-train-subset): [93] Total time: 0:00:14 (0.2324 s / it) +Averaged stats (hcp-train-subset): loss: 0.9125 (0.9126) +Eval (hcp-val): [93] [ 0/62] eta: 0:05:30 loss: 0.9117 (0.9117) time: 5.3345 data: 5.3027 max mem: 9377 +Eval (hcp-val): [93] [61/62] eta: 0:00:00 loss: 0.9134 (0.9131) time: 0.1424 data: 0.1169 max mem: 9377 +Eval (hcp-val): [93] Total time: 0:00:14 (0.2286 s / it) +Averaged stats (hcp-val): loss: 0.9134 (0.9131) +Eval (nsd-val): [93] [ 0/62] eta: 0:03:54 loss: 0.9156 (0.9156) time: 3.7892 data: 3.6932 max mem: 9377 +Eval (nsd-val): [93] [61/62] eta: 0:00:00 loss: 0.9153 (0.9191) time: 0.1275 data: 0.1021 max mem: 9377 +Eval (nsd-val): [93] Total time: 0:00:13 (0.2239 s / it) +Averaged stats (nsd-val): loss: 0.9153 (0.9191) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [94] [ 0/6250] eta: 11:38:03 lr: 0.000001 grad: 0.2453 (0.2453) loss: 0.6239 (0.6239) time: 6.7014 data: 6.5425 max mem: 9377 +Train: [94] [ 100/6250] eta: 0:22:15 lr: 0.000001 grad: 0.2036 (0.2417) loss: 0.6851 (0.7030) time: 0.1585 data: 0.0616 max mem: 9377 +Train: [94] [ 200/6250] eta: 0:18:49 lr: 0.000001 grad: 0.2057 (0.2392) loss: 0.6414 (0.6854) time: 0.1505 data: 0.0555 max mem: 9377 +Train: [94] [ 300/6250] eta: 0:17:22 lr: 0.000001 grad: 0.2020 (0.2339) loss: 0.6602 (0.6769) time: 0.1690 data: 0.0621 max mem: 9377 +Train: [94] [ 400/6250] eta: 0:16:22 lr: 0.000001 grad: 0.2162 (0.2487) loss: 0.6607 (0.6725) time: 0.1473 data: 0.0475 max mem: 9377 +Train: [94] [ 500/6250] eta: 0:15:49 lr: 0.000001 grad: 0.2077 (0.2471) loss: 0.6649 (0.6702) time: 0.1585 data: 0.0603 max mem: 9377 +Train: [94] [ 600/6250] eta: 0:15:21 lr: 0.000001 grad: 0.1948 (0.2466) loss: 0.6521 (0.6688) time: 0.1575 data: 0.0656 max mem: 9377 +Train: [94] [ 700/6250] eta: 0:14:56 lr: 0.000001 grad: 0.2014 (0.2445) loss: 0.6442 (0.6674) time: 0.1481 data: 0.0509 max mem: 9377 +Train: [94] [ 800/6250] eta: 0:14:40 lr: 0.000001 grad: 0.1976 (0.2445) loss: 0.6716 (0.6672) time: 0.1739 data: 0.0827 max mem: 9377 +Train: [94] [ 900/6250] eta: 0:14:18 lr: 0.000001 grad: 0.1988 (0.2464) loss: 0.6385 (0.6659) time: 0.1540 data: 0.0539 max mem: 9377 +Train: [94] [1000/6250] eta: 0:14:02 lr: 0.000001 grad: 0.1938 (0.2434) loss: 0.6592 (0.6649) time: 0.1191 data: 0.0305 max mem: 9377 +Train: [94] [1100/6250] eta: 0:13:47 lr: 0.000001 grad: 0.1905 (0.2418) loss: 0.6696 (0.6644) time: 0.1677 data: 0.0757 max mem: 9377 +Train: [94] [1200/6250] eta: 0:13:30 lr: 0.000001 grad: 0.1928 (0.2420) loss: 0.6559 (0.6633) time: 0.1644 data: 0.0759 max mem: 9377 +Train: [94] [1300/6250] eta: 0:13:15 lr: 0.000001 grad: 0.2005 (0.2404) loss: 0.6629 (0.6629) time: 0.1628 data: 0.0844 max mem: 9377 +Train: [94] [1400/6250] eta: 0:13:01 lr: 0.000001 grad: 0.1979 (0.2403) loss: 0.6576 (0.6626) time: 0.1586 data: 0.0725 max mem: 9377 +Train: [94] [1500/6250] eta: 0:12:44 lr: 0.000001 grad: 0.2087 (0.2415) loss: 0.6532 (0.6627) time: 0.1587 data: 0.0793 max mem: 9377 +Train: [94] [1600/6250] eta: 0:12:24 lr: 0.000001 grad: 0.1958 (0.2412) loss: 0.6757 (0.6628) time: 0.1580 data: 0.0614 max mem: 9377 +Train: [94] [1700/6250] eta: 0:12:03 lr: 0.000001 grad: 0.1942 (0.2412) loss: 0.6643 (0.6629) time: 0.1487 data: 0.0571 max mem: 9377 +Train: [94] [1800/6250] eta: 0:11:44 lr: 0.000001 grad: 0.1967 (0.2398) loss: 0.6618 (0.6631) time: 0.1509 data: 0.0611 max mem: 9377 +Train: [94] [1900/6250] eta: 0:11:25 lr: 0.000001 grad: 0.1892 (0.2407) loss: 0.6701 (0.6633) time: 0.1409 data: 0.0523 max mem: 9377 +Train: [94] [2000/6250] eta: 0:11:09 lr: 0.000001 grad: 0.1944 (0.2410) loss: 0.6637 (0.6634) time: 0.1731 data: 0.0898 max mem: 9377 +Train: [94] [2100/6250] eta: 0:10:50 lr: 0.000001 grad: 0.1932 (0.2404) loss: 0.6632 (0.6636) time: 0.1497 data: 0.0602 max mem: 9377 +Train: [94] [2200/6250] eta: 0:10:33 lr: 0.000001 grad: 0.2018 (0.2411) loss: 0.6569 (0.6635) time: 0.1423 data: 0.0544 max mem: 9377 +Train: [94] [2300/6250] eta: 0:10:17 lr: 0.000001 grad: 0.1925 (0.2421) loss: 0.6660 (0.6634) time: 0.1521 data: 0.0758 max mem: 9377 +Train: [94] [2400/6250] eta: 0:10:00 lr: 0.000001 grad: 0.1997 (0.2437) loss: 0.6594 (0.6633) time: 0.1454 data: 0.0523 max mem: 9377 +Train: [94] [2500/6250] eta: 0:09:43 lr: 0.000001 grad: 0.1948 (0.2441) loss: 0.6713 (0.6632) time: 0.1466 data: 0.0589 max mem: 9377 +Train: [94] [2600/6250] eta: 0:09:27 lr: 0.000001 grad: 0.1924 (0.2430) loss: 0.6561 (0.6629) time: 0.1501 data: 0.0627 max mem: 9377 +Train: [94] [2700/6250] eta: 0:09:11 lr: 0.000001 grad: 0.1982 (0.2439) loss: 0.6488 (0.6626) time: 0.1440 data: 0.0531 max mem: 9377 +Train: [94] [2800/6250] eta: 0:08:54 lr: 0.000001 grad: 0.1981 (0.2441) loss: 0.6422 (0.6623) time: 0.1427 data: 0.0580 max mem: 9377 +Train: [94] [2900/6250] eta: 0:08:39 lr: 0.000001 grad: 0.1923 (0.2435) loss: 0.6505 (0.6621) time: 0.1788 data: 0.1065 max mem: 9377 +Train: [94] [3000/6250] eta: 0:08:24 lr: 0.000001 grad: 0.2017 (0.2447) loss: 0.6665 (0.6619) time: 0.1671 data: 0.0809 max mem: 9377 +Train: [94] [3100/6250] eta: 0:08:07 lr: 0.000001 grad: 0.1955 (0.2452) loss: 0.6494 (0.6619) time: 0.1456 data: 0.0698 max mem: 9377 +Train: [94] [3200/6250] eta: 0:07:52 lr: 0.000001 grad: 0.1971 (0.2445) loss: 0.6692 (0.6620) time: 0.1466 data: 0.0651 max mem: 9377 +Train: [94] [3300/6250] eta: 0:07:37 lr: 0.000001 grad: 0.2004 (0.2450) loss: 0.6657 (0.6621) time: 0.1726 data: 0.0860 max mem: 9377 +Train: [94] [3400/6250] eta: 0:07:23 lr: 0.000001 grad: 0.1957 (0.2454) loss: 0.6574 (0.6621) time: 0.1781 data: 0.0919 max mem: 9377 +Train: [94] [3500/6250] eta: 0:07:07 lr: 0.000001 grad: 0.1907 (0.2448) loss: 0.6593 (0.6622) time: 0.1423 data: 0.0561 max mem: 9377 +Train: [94] [3600/6250] eta: 0:06:51 lr: 0.000001 grad: 0.1945 (0.2453) loss: 0.6571 (0.6623) time: 0.1464 data: 0.0478 max mem: 9377 +Train: [94] [3700/6250] eta: 0:06:35 lr: 0.000001 grad: 0.1982 (0.2448) loss: 0.6681 (0.6624) time: 0.1160 data: 0.0280 max mem: 9377 +Train: [94] [3800/6250] eta: 0:06:18 lr: 0.000001 grad: 0.1941 (0.2450) loss: 0.6743 (0.6626) time: 0.1493 data: 0.0617 max mem: 9377 +Train: [94] [3900/6250] eta: 0:06:02 lr: 0.000001 grad: 0.1889 (0.2455) loss: 0.6716 (0.6628) time: 0.1383 data: 0.0540 max mem: 9377 +Train: [94] [4000/6250] eta: 0:05:47 lr: 0.000001 grad: 0.1962 (0.2451) loss: 0.6634 (0.6630) time: 0.1604 data: 0.0757 max mem: 9377 +Train: [94] [4100/6250] eta: 0:05:31 lr: 0.000001 grad: 0.1981 (0.2454) loss: 0.6617 (0.6630) time: 0.1459 data: 0.0570 max mem: 9377 +Train: [94] [4200/6250] eta: 0:05:15 lr: 0.000001 grad: 0.1970 (0.2457) loss: 0.6538 (0.6631) time: 0.1480 data: 0.0670 max mem: 9377 +Train: [94] [4300/6250] eta: 0:04:59 lr: 0.000001 grad: 0.1951 (0.2458) loss: 0.6508 (0.6631) time: 0.1114 data: 0.0189 max mem: 9377 +Train: [94] [4400/6250] eta: 0:04:43 lr: 0.000001 grad: 0.1936 (0.2456) loss: 0.6680 (0.6633) time: 0.1459 data: 0.0583 max mem: 9377 +Train: [94] [4500/6250] eta: 0:04:28 lr: 0.000001 grad: 0.1929 (0.2453) loss: 0.6603 (0.6633) time: 0.1366 data: 0.0522 max mem: 9377 +Train: [94] [4600/6250] eta: 0:04:12 lr: 0.000001 grad: 0.2020 (0.2450) loss: 0.6480 (0.6632) time: 0.1380 data: 0.0500 max mem: 9377 +Train: [94] [4700/6250] eta: 0:03:57 lr: 0.000001 grad: 0.1972 (0.2445) loss: 0.6665 (0.6631) time: 0.1648 data: 0.0755 max mem: 9377 +Train: [94] [4800/6250] eta: 0:03:42 lr: 0.000001 grad: 0.1970 (0.2449) loss: 0.6760 (0.6631) time: 0.1456 data: 0.0535 max mem: 9377 +Train: [94] [4900/6250] eta: 0:03:26 lr: 0.000001 grad: 0.1927 (0.2448) loss: 0.6501 (0.6631) time: 0.1398 data: 0.0548 max mem: 9377 +Train: [94] [5000/6250] eta: 0:03:11 lr: 0.000001 grad: 0.1987 (0.2455) loss: 0.6626 (0.6631) time: 0.1397 data: 0.0505 max mem: 9377 +Train: [94] [5100/6250] eta: 0:02:55 lr: 0.000001 grad: 0.1967 (0.2455) loss: 0.6568 (0.6630) time: 0.1488 data: 0.0595 max mem: 9377 +Train: [94] [5200/6250] eta: 0:02:40 lr: 0.000001 grad: 0.1898 (0.2452) loss: 0.6610 (0.6630) time: 0.1687 data: 0.0780 max mem: 9377 +Train: [94] [5300/6250] eta: 0:02:25 lr: 0.000001 grad: 0.1994 (0.2448) loss: 0.6516 (0.6629) time: 0.1610 data: 0.0711 max mem: 9377 +Train: [94] [5400/6250] eta: 0:02:10 lr: 0.000001 grad: 0.1936 (0.2451) loss: 0.6634 (0.6629) time: 0.1552 data: 0.0706 max mem: 9377 +Train: [94] [5500/6250] eta: 0:01:55 lr: 0.000001 grad: 0.1959 (0.2453) loss: 0.6573 (0.6628) time: 0.1494 data: 0.0533 max mem: 9377 +Train: [94] [5600/6250] eta: 0:01:39 lr: 0.000001 grad: 0.1932 (0.2457) loss: 0.6660 (0.6627) time: 0.1616 data: 0.0741 max mem: 9377 +Train: [94] [5700/6250] eta: 0:01:24 lr: 0.000001 grad: 0.1974 (0.2457) loss: 0.6668 (0.6628) time: 0.1458 data: 0.0560 max mem: 9377 +Train: [94] [5800/6250] eta: 0:01:09 lr: 0.000001 grad: 0.2000 (0.2461) loss: 0.6626 (0.6627) time: 0.1547 data: 0.0555 max mem: 9377 +Train: [94] [5900/6250] eta: 0:00:53 lr: 0.000001 grad: 0.1952 (0.2464) loss: 0.6593 (0.6627) time: 0.1732 data: 0.0925 max mem: 9377 +Train: [94] [6000/6250] eta: 0:00:38 lr: 0.000001 grad: 0.1984 (0.2462) loss: 0.6685 (0.6627) time: 0.1439 data: 0.0476 max mem: 9377 +Train: [94] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1925 (0.2460) loss: 0.6629 (0.6627) time: 0.1544 data: 0.0653 max mem: 9377 +Train: [94] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1982 (0.2461) loss: 0.6528 (0.6627) time: 0.1311 data: 0.0460 max mem: 9377 +Train: [94] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1938 (0.2463) loss: 0.6548 (0.6627) time: 0.1203 data: 0.0374 max mem: 9377 +Train: [94] Total time: 0:16:00 (0.1536 s / it) +Averaged stats: lr: 0.000001 grad: 0.1938 (0.2463) loss: 0.6548 (0.6627) +Eval (hcp-train-subset): [94] [ 0/62] eta: 0:05:05 loss: 0.9018 (0.9018) time: 4.9277 data: 4.8585 max mem: 9377 +Eval (hcp-train-subset): [94] [61/62] eta: 0:00:00 loss: 0.9101 (0.9130) time: 0.1252 data: 0.0983 max mem: 9377 +Eval (hcp-train-subset): [94] Total time: 0:00:14 (0.2261 s / it) +Averaged stats (hcp-train-subset): loss: 0.9101 (0.9130) +Making plots (hcp-train-subset): example=16 +Eval (hcp-val): [94] [ 0/62] eta: 0:06:28 loss: 0.9180 (0.9180) time: 6.2675 data: 6.2324 max mem: 9377 +Eval (hcp-val): [94] [61/62] eta: 0:00:00 loss: 0.9116 (0.9124) time: 0.1290 data: 0.0962 max mem: 9377 +Eval (hcp-val): [94] Total time: 0:00:14 (0.2362 s / it) +Averaged stats (hcp-val): loss: 0.9116 (0.9124) +Making plots (hcp-val): example=51 +Eval (nsd-val): [94] [ 0/62] eta: 0:05:18 loss: 0.9134 (0.9134) time: 5.1314 data: 5.0917 max mem: 9377 +Eval (nsd-val): [94] [61/62] eta: 0:00:00 loss: 0.9153 (0.9173) time: 0.1419 data: 0.1162 max mem: 9377 +Eval (nsd-val): [94] Total time: 0:00:14 (0.2291 s / it) +Averaged stats (nsd-val): loss: 0.9153 (0.9173) +Making plots (nsd-val): example=41 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00094.pth +Train: [95] [ 0/6250] eta: 10:27:16 lr: 0.000001 grad: 0.2209 (0.2209) loss: 0.7310 (0.7310) time: 6.0218 data: 5.7748 max mem: 9377 +Train: [95] [ 100/6250] eta: 0:22:47 lr: 0.000001 grad: 0.2128 (0.2664) loss: 0.6370 (0.6464) time: 0.1601 data: 0.0579 max mem: 9377 +Train: [95] [ 200/6250] eta: 0:18:51 lr: 0.000001 grad: 0.2002 (0.2448) loss: 0.6387 (0.6440) time: 0.1531 data: 0.0509 max mem: 9377 +Train: [95] [ 300/6250] eta: 0:17:25 lr: 0.000001 grad: 0.1956 (0.2495) loss: 0.6543 (0.6437) time: 0.1529 data: 0.0418 max mem: 9377 +Train: [95] [ 400/6250] eta: 0:16:26 lr: 0.000001 grad: 0.2012 (0.2440) loss: 0.6495 (0.6462) time: 0.1326 data: 0.0372 max mem: 9377 +Train: [95] [ 500/6250] eta: 0:15:52 lr: 0.000001 grad: 0.1966 (0.2452) loss: 0.6483 (0.6485) time: 0.1455 data: 0.0435 max mem: 9377 +Train: [95] [ 600/6250] eta: 0:15:22 lr: 0.000001 grad: 0.1911 (0.2437) loss: 0.6715 (0.6501) time: 0.1394 data: 0.0415 max mem: 9377 +Train: [95] [ 700/6250] eta: 0:14:51 lr: 0.000001 grad: 0.2003 (0.2433) loss: 0.6462 (0.6503) time: 0.1279 data: 0.0415 max mem: 9377 +Train: [95] [ 800/6250] eta: 0:14:31 lr: 0.000001 grad: 0.1970 (0.2456) loss: 0.6519 (0.6500) time: 0.1650 data: 0.0721 max mem: 9377 +Train: [95] [ 900/6250] eta: 0:14:12 lr: 0.000001 grad: 0.1960 (0.2448) loss: 0.6528 (0.6504) time: 0.1151 data: 0.0321 max mem: 9377 +Train: [95] [1000/6250] eta: 0:13:55 lr: 0.000001 grad: 0.1969 (0.2472) loss: 0.6600 (0.6509) time: 0.1744 data: 0.0754 max mem: 9377 +Train: [95] [1100/6250] eta: 0:13:35 lr: 0.000001 grad: 0.1945 (0.2484) loss: 0.6420 (0.6512) time: 0.1563 data: 0.0637 max mem: 9377 +Train: [95] [1200/6250] eta: 0:13:15 lr: 0.000001 grad: 0.1919 (0.2478) loss: 0.6474 (0.6515) time: 0.1316 data: 0.0475 max mem: 9377 +Train: [95] [1300/6250] eta: 0:12:57 lr: 0.000001 grad: 0.1961 (0.2500) loss: 0.6373 (0.6514) time: 0.1772 data: 0.0812 max mem: 9377 +Train: [95] [1400/6250] eta: 0:12:40 lr: 0.000001 grad: 0.1981 (0.2509) loss: 0.6339 (0.6510) time: 0.1415 data: 0.0583 max mem: 9377 +Train: [95] [1500/6250] eta: 0:12:21 lr: 0.000001 grad: 0.1944 (0.2496) loss: 0.6482 (0.6509) time: 0.1346 data: 0.0472 max mem: 9377 +Train: [95] [1600/6250] eta: 0:12:04 lr: 0.000001 grad: 0.1943 (0.2530) loss: 0.6620 (0.6509) time: 0.1613 data: 0.0700 max mem: 9377 +Train: [95] [1700/6250] eta: 0:11:45 lr: 0.000001 grad: 0.1915 (0.2533) loss: 0.6570 (0.6510) time: 0.1378 data: 0.0514 max mem: 9377 +Train: [95] [1800/6250] eta: 0:11:28 lr: 0.000001 grad: 0.1950 (0.2528) loss: 0.6447 (0.6509) time: 0.1534 data: 0.0646 max mem: 9377 +Train: [95] [1900/6250] eta: 0:11:10 lr: 0.000001 grad: 0.1912 (0.2530) loss: 0.6638 (0.6510) time: 0.1522 data: 0.0673 max mem: 9377 +Train: [95] [2000/6250] eta: 0:10:53 lr: 0.000001 grad: 0.1932 (0.2518) loss: 0.6382 (0.6509) time: 0.1445 data: 0.0505 max mem: 9377 +Train: [95] [2100/6250] eta: 0:10:35 lr: 0.000001 grad: 0.2029 (0.2524) loss: 0.6525 (0.6509) time: 0.1504 data: 0.0694 max mem: 9377 +Train: [95] [2200/6250] eta: 0:10:19 lr: 0.000001 grad: 0.2012 (0.2538) loss: 0.6581 (0.6511) time: 0.1638 data: 0.0787 max mem: 9377 +Train: [95] [2300/6250] eta: 0:10:01 lr: 0.000001 grad: 0.1923 (0.2529) loss: 0.6587 (0.6511) time: 0.1399 data: 0.0483 max mem: 9377 +Train: [95] [2400/6250] eta: 0:09:44 lr: 0.000001 grad: 0.1954 (0.2520) loss: 0.6509 (0.6512) time: 0.1378 data: 0.0484 max mem: 9377 +Train: [95] [2500/6250] eta: 0:09:29 lr: 0.000001 grad: 0.1904 (0.2514) loss: 0.6603 (0.6514) time: 0.1580 data: 0.0696 max mem: 9377 +Train: [95] [2600/6250] eta: 0:09:13 lr: 0.000001 grad: 0.1992 (0.2512) loss: 0.6344 (0.6512) time: 0.1546 data: 0.0676 max mem: 9377 +Train: [95] [2700/6250] eta: 0:08:57 lr: 0.000001 grad: 0.1934 (0.2502) loss: 0.6519 (0.6512) time: 0.1718 data: 0.0821 max mem: 9377 +Train: [95] [2800/6250] eta: 0:08:40 lr: 0.000001 grad: 0.1953 (0.2495) loss: 0.6400 (0.6513) time: 0.1243 data: 0.0382 max mem: 9377 +Train: [95] [2900/6250] eta: 0:08:26 lr: 0.000001 grad: 0.1968 (0.2496) loss: 0.6553 (0.6512) time: 0.1528 data: 0.0664 max mem: 9377 +Train: [95] [3000/6250] eta: 0:08:11 lr: 0.000001 grad: 0.1971 (0.2502) loss: 0.6401 (0.6513) time: 0.1546 data: 0.0681 max mem: 9377 +Train: [95] [3100/6250] eta: 0:07:56 lr: 0.000001 grad: 0.2054 (0.2498) loss: 0.6501 (0.6514) time: 0.1651 data: 0.0833 max mem: 9377 +Train: [95] [3200/6250] eta: 0:07:41 lr: 0.000001 grad: 0.1909 (0.2509) loss: 0.6588 (0.6517) time: 0.1398 data: 0.0572 max mem: 9377 +Train: [95] [3300/6250] eta: 0:07:26 lr: 0.000001 grad: 0.1919 (0.2519) loss: 0.6662 (0.6519) time: 0.1568 data: 0.0704 max mem: 9377 +Train: [95] [3400/6250] eta: 0:07:11 lr: 0.000001 grad: 0.1953 (0.2526) loss: 0.6682 (0.6522) time: 0.1761 data: 0.0990 max mem: 9377 +Train: [95] [3500/6250] eta: 0:06:56 lr: 0.000001 grad: 0.1889 (0.2519) loss: 0.6685 (0.6526) time: 0.1482 data: 0.0665 max mem: 9377 +Train: [95] [3600/6250] eta: 0:06:40 lr: 0.000001 grad: 0.1957 (0.2515) loss: 0.6732 (0.6530) time: 0.1353 data: 0.0471 max mem: 9377 +Train: [95] [3700/6250] eta: 0:06:24 lr: 0.000001 grad: 0.1921 (0.2509) loss: 0.6533 (0.6533) time: 0.1475 data: 0.0604 max mem: 9377 +Train: [95] [3800/6250] eta: 0:06:09 lr: 0.000001 grad: 0.1955 (0.2505) loss: 0.6646 (0.6537) time: 0.1579 data: 0.0703 max mem: 9377 +Train: [95] [3900/6250] eta: 0:05:53 lr: 0.000001 grad: 0.1936 (0.2510) loss: 0.6594 (0.6541) time: 0.1291 data: 0.0376 max mem: 9377 +Train: [95] [4000/6250] eta: 0:05:38 lr: 0.000001 grad: 0.1939 (0.2511) loss: 0.6764 (0.6546) time: 0.1656 data: 0.0780 max mem: 9377 +Train: [95] [4100/6250] eta: 0:05:23 lr: 0.000001 grad: 0.2019 (0.2510) loss: 0.6572 (0.6549) time: 0.1427 data: 0.0547 max mem: 9377 +Train: [95] [4200/6250] eta: 0:05:07 lr: 0.000001 grad: 0.2015 (0.2505) loss: 0.6805 (0.6552) time: 0.1483 data: 0.0671 max mem: 9377 +Train: [95] [4300/6250] eta: 0:04:52 lr: 0.000001 grad: 0.1950 (0.2506) loss: 0.6768 (0.6556) time: 0.1583 data: 0.0777 max mem: 9377 +Train: [95] [4400/6250] eta: 0:04:37 lr: 0.000001 grad: 0.1917 (0.2515) loss: 0.6806 (0.6560) time: 0.1343 data: 0.0425 max mem: 9377 +Train: [95] [4500/6250] eta: 0:04:21 lr: 0.000001 grad: 0.1936 (0.2508) loss: 0.6756 (0.6563) time: 0.1289 data: 0.0402 max mem: 9377 +Train: [95] [4600/6250] eta: 0:04:06 lr: 0.000001 grad: 0.1970 (0.2509) loss: 0.6587 (0.6565) time: 0.1421 data: 0.0572 max mem: 9377 +Train: [95] [4700/6250] eta: 0:03:51 lr: 0.000001 grad: 0.1961 (0.2508) loss: 0.6715 (0.6568) time: 0.1580 data: 0.0670 max mem: 9377 +Train: [95] [4800/6250] eta: 0:03:36 lr: 0.000001 grad: 0.1934 (0.2510) loss: 0.6706 (0.6571) time: 0.1333 data: 0.0498 max mem: 9377 +Train: [95] [4900/6250] eta: 0:03:21 lr: 0.000001 grad: 0.1901 (0.2512) loss: 0.6779 (0.6573) time: 0.1470 data: 0.0618 max mem: 9377 +Train: [95] [5000/6250] eta: 0:03:05 lr: 0.000001 grad: 0.1938 (0.2517) loss: 0.6640 (0.6575) time: 0.1428 data: 0.0511 max mem: 9377 +Train: [95] [5100/6250] eta: 0:02:51 lr: 0.000001 grad: 0.1947 (0.2513) loss: 0.6686 (0.6577) time: 0.1529 data: 0.0694 max mem: 9377 +Train: [95] [5200/6250] eta: 0:02:36 lr: 0.000001 grad: 0.1982 (0.2513) loss: 0.6736 (0.6579) time: 0.1626 data: 0.0737 max mem: 9377 +Train: [95] [5300/6250] eta: 0:02:21 lr: 0.000001 grad: 0.1979 (0.2518) loss: 0.6591 (0.6580) time: 0.1712 data: 0.0802 max mem: 9377 +Train: [95] [5400/6250] eta: 0:02:06 lr: 0.000001 grad: 0.1886 (0.2513) loss: 0.6711 (0.6583) time: 0.1426 data: 0.0614 max mem: 9377 +Train: [95] [5500/6250] eta: 0:01:51 lr: 0.000001 grad: 0.1942 (0.2512) loss: 0.6620 (0.6585) time: 0.1458 data: 0.0670 max mem: 9377 +Train: [95] [5600/6250] eta: 0:01:37 lr: 0.000001 grad: 0.1955 (0.2509) loss: 0.6663 (0.6587) time: 0.1556 data: 0.0702 max mem: 9377 +Train: [95] [5700/6250] eta: 0:01:22 lr: 0.000001 grad: 0.1906 (0.2505) loss: 0.6644 (0.6589) time: 0.1678 data: 0.0754 max mem: 9377 +Train: [95] [5800/6250] eta: 0:01:07 lr: 0.000001 grad: 0.1995 (0.2511) loss: 0.6633 (0.6590) time: 0.1406 data: 0.0539 max mem: 9377 +Train: [95] [5900/6250] eta: 0:00:52 lr: 0.000001 grad: 0.1919 (0.2506) loss: 0.6608 (0.6591) time: 0.1338 data: 0.0412 max mem: 9377 +Train: [95] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.2010 (0.2505) loss: 0.6692 (0.6592) time: 0.1596 data: 0.0745 max mem: 9377 +Train: [95] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1988 (0.2500) loss: 0.6571 (0.6594) time: 0.1208 data: 0.0330 max mem: 9377 +Train: [95] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1934 (0.2499) loss: 0.6659 (0.6594) time: 0.1406 data: 0.0475 max mem: 9377 +Train: [95] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1925 (0.2497) loss: 0.6519 (0.6594) time: 0.1477 data: 0.0487 max mem: 9377 +Train: [95] Total time: 0:15:37 (0.1500 s / it) +Averaged stats: lr: 0.000001 grad: 0.1925 (0.2497) loss: 0.6519 (0.6594) +Eval (hcp-train-subset): [95] [ 0/62] eta: 0:06:10 loss: 0.9021 (0.9021) time: 5.9778 data: 5.9466 max mem: 9377 +Eval (hcp-train-subset): [95] [61/62] eta: 0:00:00 loss: 0.9145 (0.9141) time: 0.1292 data: 0.1043 max mem: 9377 +Eval (hcp-train-subset): [95] Total time: 0:00:14 (0.2345 s / it) +Averaged stats (hcp-train-subset): loss: 0.9145 (0.9141) +Eval (hcp-val): [95] [ 0/62] eta: 0:05:41 loss: 0.9134 (0.9134) time: 5.5066 data: 5.4579 max mem: 9377 +Eval (hcp-val): [95] [61/62] eta: 0:00:00 loss: 0.9111 (0.9130) time: 0.1348 data: 0.1094 max mem: 9377 +Eval (hcp-val): [95] Total time: 0:00:14 (0.2294 s / it) +Averaged stats (hcp-val): loss: 0.9111 (0.9130) +Eval (nsd-val): [95] [ 0/62] eta: 0:06:39 loss: 0.9182 (0.9182) time: 6.4359 data: 6.4040 max mem: 9377 +Eval (nsd-val): [95] [61/62] eta: 0:00:00 loss: 0.9159 (0.9182) time: 0.1273 data: 0.1016 max mem: 9377 +Eval (nsd-val): [95] Total time: 0:00:14 (0.2290 s / it) +Averaged stats (nsd-val): loss: 0.9159 (0.9182) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [96] [ 0/6250] eta: 10:18:38 lr: 0.000001 grad: 0.3734 (0.3734) loss: 0.7360 (0.7360) time: 5.9390 data: 5.7112 max mem: 9377 +Train: [96] [ 100/6250] eta: 0:21:53 lr: 0.000001 grad: 0.2026 (0.2553) loss: 0.6664 (0.6690) time: 0.1540 data: 0.0494 max mem: 9377 +Train: [96] [ 200/6250] eta: 0:18:40 lr: 0.000001 grad: 0.2048 (0.2404) loss: 0.6545 (0.6659) time: 0.1619 data: 0.0582 max mem: 9377 +Train: [96] [ 300/6250] eta: 0:17:17 lr: 0.000001 grad: 0.2094 (0.2531) loss: 0.6553 (0.6633) time: 0.1663 data: 0.0587 max mem: 9377 +Train: [96] [ 400/6250] eta: 0:16:23 lr: 0.000001 grad: 0.1932 (0.2489) loss: 0.6767 (0.6650) time: 0.1361 data: 0.0373 max mem: 9377 +Train: [96] [ 500/6250] eta: 0:15:47 lr: 0.000001 grad: 0.1965 (0.2507) loss: 0.6699 (0.6668) time: 0.1479 data: 0.0670 max mem: 9377 +Train: [96] [ 600/6250] eta: 0:15:13 lr: 0.000001 grad: 0.1939 (0.2469) loss: 0.6712 (0.6676) time: 0.1312 data: 0.0370 max mem: 9377 +Train: [96] [ 700/6250] eta: 0:14:49 lr: 0.000001 grad: 0.1980 (0.2484) loss: 0.6667 (0.6673) time: 0.1287 data: 0.0449 max mem: 9377 +Train: [96] [ 800/6250] eta: 0:14:24 lr: 0.000001 grad: 0.2057 (0.2451) loss: 0.6691 (0.6668) time: 0.1509 data: 0.0635 max mem: 9377 +Train: [96] [ 900/6250] eta: 0:14:04 lr: 0.000001 grad: 0.2045 (0.2488) loss: 0.6481 (0.6660) time: 0.1698 data: 0.0703 max mem: 9377 +Train: [96] [1000/6250] eta: 0:13:47 lr: 0.000001 grad: 0.1938 (0.2510) loss: 0.6505 (0.6655) time: 0.1770 data: 0.0810 max mem: 9377 +Train: [96] [1100/6250] eta: 0:13:29 lr: 0.000000 grad: 0.1975 (0.2553) loss: 0.6608 (0.6648) time: 0.1632 data: 0.0737 max mem: 9377 +Train: [96] [1200/6250] eta: 0:13:13 lr: 0.000000 grad: 0.1951 (0.2554) loss: 0.6583 (0.6641) time: 0.1551 data: 0.0666 max mem: 9377 +Train: [96] [1300/6250] eta: 0:12:57 lr: 0.000000 grad: 0.1983 (0.2578) loss: 0.6474 (0.6633) time: 0.1797 data: 0.0911 max mem: 9377 +Train: [96] [1400/6250] eta: 0:12:41 lr: 0.000000 grad: 0.1985 (0.2561) loss: 0.6639 (0.6630) time: 0.1525 data: 0.0708 max mem: 9377 +Train: [96] [1500/6250] eta: 0:12:26 lr: 0.000000 grad: 0.1888 (0.2555) loss: 0.6592 (0.6630) time: 0.1604 data: 0.0707 max mem: 9377 +Train: [96] [1600/6250] eta: 0:12:07 lr: 0.000000 grad: 0.1917 (0.2562) loss: 0.6689 (0.6628) time: 0.1527 data: 0.0683 max mem: 9377 +Train: [96] [1700/6250] eta: 0:11:47 lr: 0.000000 grad: 0.1888 (0.2547) loss: 0.6610 (0.6627) time: 0.1363 data: 0.0463 max mem: 9377 +Train: [96] [1800/6250] eta: 0:11:28 lr: 0.000000 grad: 0.1916 (0.2558) loss: 0.6573 (0.6626) time: 0.1465 data: 0.0655 max mem: 9377 +Train: [96] [1900/6250] eta: 0:11:11 lr: 0.000000 grad: 0.1920 (0.2548) loss: 0.6758 (0.6625) time: 0.1491 data: 0.0677 max mem: 9377 +Train: [96] [2000/6250] eta: 0:10:54 lr: 0.000000 grad: 0.1924 (0.2553) loss: 0.6587 (0.6625) time: 0.1318 data: 0.0415 max mem: 9377 +Train: [96] [2100/6250] eta: 0:10:35 lr: 0.000000 grad: 0.1903 (0.2534) loss: 0.6676 (0.6627) time: 0.1581 data: 0.0665 max mem: 9377 +Train: [96] [2200/6250] eta: 0:10:17 lr: 0.000000 grad: 0.1930 (0.2525) loss: 0.6712 (0.6625) time: 0.1291 data: 0.0393 max mem: 9377 +Train: [96] [2300/6250] eta: 0:09:59 lr: 0.000000 grad: 0.2013 (0.2511) loss: 0.6839 (0.6626) time: 0.1400 data: 0.0509 max mem: 9377 +Train: [96] [2400/6250] eta: 0:09:42 lr: 0.000000 grad: 0.1898 (0.2504) loss: 0.6640 (0.6627) time: 0.1286 data: 0.0462 max mem: 9377 +Train: [96] [2500/6250] eta: 0:09:25 lr: 0.000000 grad: 0.1919 (0.2503) loss: 0.6726 (0.6629) time: 0.1462 data: 0.0621 max mem: 9377 +Train: [96] [2600/6250] eta: 0:09:07 lr: 0.000000 grad: 0.1909 (0.2516) loss: 0.6648 (0.6630) time: 0.1462 data: 0.0557 max mem: 9377 +Train: [96] [2700/6250] eta: 0:08:51 lr: 0.000000 grad: 0.1985 (0.2520) loss: 0.6709 (0.6632) time: 0.1309 data: 0.0347 max mem: 9377 +Train: [96] [2800/6250] eta: 0:08:35 lr: 0.000000 grad: 0.1984 (0.2528) loss: 0.6607 (0.6633) time: 0.1399 data: 0.0490 max mem: 9377 +Train: [96] [2900/6250] eta: 0:08:18 lr: 0.000000 grad: 0.1939 (0.2527) loss: 0.6642 (0.6635) time: 0.1444 data: 0.0478 max mem: 9377 +Train: [96] [3000/6250] eta: 0:08:04 lr: 0.000000 grad: 0.1957 (0.2544) loss: 0.6538 (0.6635) time: 0.1417 data: 0.0535 max mem: 9377 +Train: [96] [3100/6250] eta: 0:07:50 lr: 0.000000 grad: 0.1886 (0.2561) loss: 0.6607 (0.6637) time: 0.1394 data: 0.0536 max mem: 9377 +Train: [96] [3200/6250] eta: 0:07:35 lr: 0.000000 grad: 0.1966 (0.2563) loss: 0.6414 (0.6634) time: 0.1459 data: 0.0604 max mem: 9377 +Train: [96] [3300/6250] eta: 0:07:21 lr: 0.000000 grad: 0.1932 (0.2561) loss: 0.6588 (0.6634) time: 0.1700 data: 0.0813 max mem: 9377 +Train: [96] [3400/6250] eta: 0:07:06 lr: 0.000000 grad: 0.1870 (0.2559) loss: 0.6718 (0.6633) time: 0.1429 data: 0.0469 max mem: 9377 +Train: [96] [3500/6250] eta: 0:06:51 lr: 0.000000 grad: 0.1944 (0.2560) loss: 0.6679 (0.6632) time: 0.1558 data: 0.0712 max mem: 9377 +Train: [96] [3600/6250] eta: 0:06:36 lr: 0.000000 grad: 0.1947 (0.2560) loss: 0.6535 (0.6632) time: 0.1372 data: 0.0501 max mem: 9377 +Train: [96] [3700/6250] eta: 0:06:20 lr: 0.000000 grad: 0.1917 (0.2564) loss: 0.6534 (0.6630) time: 0.1435 data: 0.0553 max mem: 9377 +Train: [96] [3800/6250] eta: 0:06:05 lr: 0.000000 grad: 0.1937 (0.2567) loss: 0.6560 (0.6629) time: 0.1377 data: 0.0548 max mem: 9377 +Train: [96] [3900/6250] eta: 0:05:50 lr: 0.000000 grad: 0.1892 (0.2565) loss: 0.6611 (0.6630) time: 0.1374 data: 0.0487 max mem: 9377 +Train: [96] [4000/6250] eta: 0:05:34 lr: 0.000000 grad: 0.1899 (0.2556) loss: 0.6602 (0.6630) time: 0.1240 data: 0.0342 max mem: 9377 +Train: [96] [4100/6250] eta: 0:05:18 lr: 0.000000 grad: 0.1929 (0.2557) loss: 0.6550 (0.6630) time: 0.1272 data: 0.0337 max mem: 9377 +Train: [96] [4200/6250] eta: 0:05:03 lr: 0.000000 grad: 0.1923 (0.2548) loss: 0.6475 (0.6630) time: 0.1610 data: 0.0712 max mem: 9377 +Train: [96] [4300/6250] eta: 0:04:48 lr: 0.000000 grad: 0.1905 (0.2548) loss: 0.6688 (0.6630) time: 0.1505 data: 0.0628 max mem: 9377 +Train: [96] [4400/6250] eta: 0:04:32 lr: 0.000000 grad: 0.1911 (0.2545) loss: 0.6614 (0.6630) time: 0.1156 data: 0.0317 max mem: 9377 +Train: [96] [4500/6250] eta: 0:04:17 lr: 0.000000 grad: 0.1971 (0.2555) loss: 0.6576 (0.6630) time: 0.1232 data: 0.0260 max mem: 9377 +Train: [96] [4600/6250] eta: 0:04:03 lr: 0.000000 grad: 0.1923 (0.2548) loss: 0.6636 (0.6630) time: 0.1578 data: 0.0788 max mem: 9377 +Train: [96] [4700/6250] eta: 0:03:47 lr: 0.000000 grad: 0.1890 (0.2555) loss: 0.6766 (0.6631) time: 0.1370 data: 0.0522 max mem: 9377 +Train: [96] [4800/6250] eta: 0:03:32 lr: 0.000000 grad: 0.1943 (0.2556) loss: 0.6648 (0.6632) time: 0.1278 data: 0.0436 max mem: 9377 +Train: [96] [4900/6250] eta: 0:03:18 lr: 0.000000 grad: 0.2031 (0.2554) loss: 0.6617 (0.6634) time: 0.1435 data: 0.0555 max mem: 9377 +Train: [96] [5000/6250] eta: 0:03:03 lr: 0.000000 grad: 0.1951 (0.2545) loss: 0.6709 (0.6635) time: 0.1602 data: 0.0726 max mem: 9377 +Train: [96] [5100/6250] eta: 0:02:48 lr: 0.000000 grad: 0.1909 (0.2541) loss: 0.6596 (0.6636) time: 0.1409 data: 0.0547 max mem: 9377 +Train: [96] [5200/6250] eta: 0:02:33 lr: 0.000000 grad: 0.1925 (0.2534) loss: 0.6535 (0.6636) time: 0.1667 data: 0.0866 max mem: 9377 +Train: [96] [5300/6250] eta: 0:02:19 lr: 0.000000 grad: 0.1915 (0.2530) loss: 0.6740 (0.6637) time: 0.1408 data: 0.0638 max mem: 9377 +Train: [96] [5400/6250] eta: 0:02:04 lr: 0.000000 grad: 0.1949 (0.2530) loss: 0.6556 (0.6637) time: 0.1352 data: 0.0577 max mem: 9377 +Train: [96] [5500/6250] eta: 0:01:50 lr: 0.000000 grad: 0.1914 (0.2529) loss: 0.6682 (0.6638) time: 0.1280 data: 0.0449 max mem: 9377 +Train: [96] [5600/6250] eta: 0:01:35 lr: 0.000000 grad: 0.1909 (0.2526) loss: 0.6632 (0.6637) time: 0.1467 data: 0.0631 max mem: 9377 +Train: [96] [5700/6250] eta: 0:01:20 lr: 0.000000 grad: 0.1935 (0.2525) loss: 0.6561 (0.6636) time: 0.1635 data: 0.0709 max mem: 9377 +Train: [96] [5800/6250] eta: 0:01:06 lr: 0.000000 grad: 0.1938 (0.2530) loss: 0.6601 (0.6635) time: 0.1551 data: 0.0660 max mem: 9377 +Train: [96] [5900/6250] eta: 0:00:51 lr: 0.000000 grad: 0.1906 (0.2524) loss: 0.6625 (0.6634) time: 0.1304 data: 0.0423 max mem: 9377 +Train: [96] [6000/6250] eta: 0:00:36 lr: 0.000000 grad: 0.1964 (0.2528) loss: 0.6573 (0.6632) time: 0.1658 data: 0.0722 max mem: 9377 +Train: [96] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.1969 (0.2531) loss: 0.6576 (0.6631) time: 0.1570 data: 0.0742 max mem: 9377 +Train: [96] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2025 (0.2529) loss: 0.6498 (0.6629) time: 0.1534 data: 0.0621 max mem: 9377 +Train: [96] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1955 (0.2536) loss: 0.6464 (0.6629) time: 0.1732 data: 0.0916 max mem: 9377 +Train: [96] Total time: 0:15:24 (0.1480 s / it) +Averaged stats: lr: 0.000000 grad: 0.1955 (0.2536) loss: 0.6464 (0.6629) +Eval (hcp-train-subset): [96] [ 0/62] eta: 0:06:19 loss: 0.9033 (0.9033) time: 6.1141 data: 6.0837 max mem: 9377 +Eval (hcp-train-subset): [96] [61/62] eta: 0:00:00 loss: 0.9114 (0.9137) time: 0.1521 data: 0.1254 max mem: 9377 +Eval (hcp-train-subset): [96] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (hcp-train-subset): loss: 0.9114 (0.9137) +Eval (hcp-val): [96] [ 0/62] eta: 0:04:19 loss: 0.9049 (0.9049) time: 4.1888 data: 4.1177 max mem: 9377 +Eval (hcp-val): [96] [61/62] eta: 0:00:00 loss: 0.9150 (0.9132) time: 0.1375 data: 0.1121 max mem: 9377 +Eval (hcp-val): [96] Total time: 0:00:14 (0.2325 s / it) +Averaged stats (hcp-val): loss: 0.9150 (0.9132) +Eval (nsd-val): [96] [ 0/62] eta: 0:06:14 loss: 0.9144 (0.9144) time: 6.0342 data: 6.0020 max mem: 9377 +Eval (nsd-val): [96] [61/62] eta: 0:00:00 loss: 0.9158 (0.9192) time: 0.1093 data: 0.0838 max mem: 9377 +Eval (nsd-val): [96] Total time: 0:00:14 (0.2281 s / it) +Averaged stats (nsd-val): loss: 0.9158 (0.9192) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [97] [ 0/6250] eta: 9:24:34 lr: 0.000000 grad: 0.2306 (0.2306) loss: 0.6512 (0.6512) time: 5.4200 data: 5.0304 max mem: 9377 +Train: [97] [ 100/6250] eta: 0:21:14 lr: 0.000000 grad: 0.1993 (0.2714) loss: 0.6410 (0.6668) time: 0.1479 data: 0.0403 max mem: 9377 +Train: [97] [ 200/6250] eta: 0:18:10 lr: 0.000000 grad: 0.1931 (0.2741) loss: 0.6745 (0.6691) time: 0.1620 data: 0.0571 max mem: 9377 +Train: [97] [ 300/6250] eta: 0:16:51 lr: 0.000000 grad: 0.1959 (0.2608) loss: 0.6608 (0.6678) time: 0.1587 data: 0.0528 max mem: 9377 +Train: [97] [ 400/6250] eta: 0:16:10 lr: 0.000000 grad: 0.1948 (0.2622) loss: 0.6648 (0.6692) time: 0.1308 data: 0.0389 max mem: 9377 +Train: [97] [ 500/6250] eta: 0:15:38 lr: 0.000000 grad: 0.1957 (0.2663) loss: 0.6618 (0.6697) time: 0.1517 data: 0.0514 max mem: 9377 +Train: [97] [ 600/6250] eta: 0:15:12 lr: 0.000000 grad: 0.1944 (0.2621) loss: 0.6718 (0.6709) time: 0.1851 data: 0.0950 max mem: 9377 +Train: [97] [ 700/6250] eta: 0:14:41 lr: 0.000000 grad: 0.1991 (0.2610) loss: 0.6606 (0.6709) time: 0.1424 data: 0.0487 max mem: 9377 +Train: [97] [ 800/6250] eta: 0:14:20 lr: 0.000000 grad: 0.1939 (0.2571) loss: 0.6635 (0.6708) time: 0.1576 data: 0.0606 max mem: 9377 +Train: [97] [ 900/6250] eta: 0:13:56 lr: 0.000000 grad: 0.1917 (0.2555) loss: 0.6871 (0.6708) time: 0.1274 data: 0.0263 max mem: 9377 +Train: [97] [1000/6250] eta: 0:13:36 lr: 0.000000 grad: 0.1966 (0.2665) loss: 0.6625 (0.6701) time: 0.1317 data: 0.0394 max mem: 9377 +Train: [97] [1100/6250] eta: 0:13:18 lr: 0.000000 grad: 0.1912 (0.2664) loss: 0.6621 (0.6693) time: 0.1386 data: 0.0476 max mem: 9377 +Train: [97] [1200/6250] eta: 0:13:08 lr: 0.000000 grad: 0.1949 (0.2652) loss: 0.6645 (0.6686) time: 0.1709 data: 0.0731 max mem: 9377 +Train: [97] [1300/6250] eta: 0:12:52 lr: 0.000000 grad: 0.1883 (0.2641) loss: 0.6689 (0.6679) time: 0.1466 data: 0.0633 max mem: 9377 +Train: [97] [1400/6250] eta: 0:12:36 lr: 0.000000 grad: 0.1911 (0.2655) loss: 0.6637 (0.6673) time: 0.1546 data: 0.0583 max mem: 9377 +Train: [97] [1500/6250] eta: 0:12:21 lr: 0.000000 grad: 0.1976 (0.2631) loss: 0.6701 (0.6669) time: 0.1563 data: 0.0632 max mem: 9377 +Train: [97] [1600/6250] eta: 0:12:02 lr: 0.000000 grad: 0.1913 (0.2627) loss: 0.6606 (0.6663) time: 0.1476 data: 0.0577 max mem: 9377 +Train: [97] [1700/6250] eta: 0:11:44 lr: 0.000000 grad: 0.1983 (0.2607) loss: 0.6443 (0.6656) time: 0.1544 data: 0.0704 max mem: 9377 +Train: [97] [1800/6250] eta: 0:11:27 lr: 0.000000 grad: 0.2004 (0.2600) loss: 0.6393 (0.6650) time: 0.1621 data: 0.0738 max mem: 9377 +Train: [97] [1900/6250] eta: 0:11:09 lr: 0.000000 grad: 0.1970 (0.2598) loss: 0.6381 (0.6642) time: 0.1517 data: 0.0620 max mem: 9377 +Train: [97] [2000/6250] eta: 0:10:50 lr: 0.000000 grad: 0.1946 (0.2580) loss: 0.6437 (0.6634) time: 0.1504 data: 0.0667 max mem: 9377 +Train: [97] [2100/6250] eta: 0:10:34 lr: 0.000000 grad: 0.1949 (0.2565) loss: 0.6448 (0.6629) time: 0.1583 data: 0.0727 max mem: 9377 +Train: [97] [2200/6250] eta: 0:10:16 lr: 0.000000 grad: 0.1917 (0.2549) loss: 0.6662 (0.6626) time: 0.1423 data: 0.0555 max mem: 9377 +Train: [97] [2300/6250] eta: 0:09:59 lr: 0.000000 grad: 0.1897 (0.2534) loss: 0.6627 (0.6625) time: 0.1169 data: 0.0220 max mem: 9377 +Train: [97] [2400/6250] eta: 0:09:42 lr: 0.000000 grad: 0.1916 (0.2534) loss: 0.6597 (0.6623) time: 0.1290 data: 0.0464 max mem: 9377 +Train: [97] [2500/6250] eta: 0:09:25 lr: 0.000000 grad: 0.1932 (0.2538) loss: 0.6689 (0.6622) time: 0.1472 data: 0.0640 max mem: 9377 +Train: [97] [2600/6250] eta: 0:09:07 lr: 0.000000 grad: 0.1862 (0.2541) loss: 0.6626 (0.6622) time: 0.1335 data: 0.0441 max mem: 9377 +Train: [97] [2700/6250] eta: 0:08:51 lr: 0.000000 grad: 0.1909 (0.2535) loss: 0.6809 (0.6622) time: 0.1561 data: 0.0698 max mem: 9377 +Train: [97] [2800/6250] eta: 0:08:35 lr: 0.000000 grad: 0.1941 (0.2534) loss: 0.6625 (0.6622) time: 0.1415 data: 0.0550 max mem: 9377 +Train: [97] [2900/6250] eta: 0:08:19 lr: 0.000000 grad: 0.1889 (0.2529) loss: 0.6591 (0.6621) time: 0.1480 data: 0.0619 max mem: 9377 +Train: [97] [3000/6250] eta: 0:08:05 lr: 0.000000 grad: 0.1939 (0.2530) loss: 0.6698 (0.6622) time: 0.1557 data: 0.0751 max mem: 9377 +Train: [97] [3100/6250] eta: 0:07:51 lr: 0.000000 grad: 0.1928 (0.2536) loss: 0.6405 (0.6622) time: 0.1639 data: 0.0806 max mem: 9377 +Train: [97] [3200/6250] eta: 0:07:36 lr: 0.000000 grad: 0.1937 (0.2539) loss: 0.6586 (0.6620) time: 0.1499 data: 0.0631 max mem: 9377 +Train: [97] [3300/6250] eta: 0:07:21 lr: 0.000000 grad: 0.1925 (0.2549) loss: 0.6489 (0.6619) time: 0.1437 data: 0.0561 max mem: 9377 +Train: [97] [3400/6250] eta: 0:07:06 lr: 0.000000 grad: 0.1920 (0.2544) loss: 0.6661 (0.6619) time: 0.1521 data: 0.0622 max mem: 9377 +Train: [97] [3500/6250] eta: 0:06:51 lr: 0.000000 grad: 0.1993 (0.2554) loss: 0.6528 (0.6618) time: 0.1527 data: 0.0652 max mem: 9377 +Train: [97] [3600/6250] eta: 0:06:36 lr: 0.000000 grad: 0.1941 (0.2559) loss: 0.6638 (0.6616) time: 0.1447 data: 0.0632 max mem: 9377 +Train: [97] [3700/6250] eta: 0:06:20 lr: 0.000000 grad: 0.1924 (0.2553) loss: 0.6556 (0.6616) time: 0.1278 data: 0.0421 max mem: 9377 +Train: [97] [3800/6250] eta: 0:06:05 lr: 0.000000 grad: 0.1911 (0.2550) loss: 0.6706 (0.6616) time: 0.1569 data: 0.0721 max mem: 9377 +Train: [97] [3900/6250] eta: 0:05:50 lr: 0.000000 grad: 0.1892 (0.2554) loss: 0.6592 (0.6616) time: 0.1487 data: 0.0665 max mem: 9377 +Train: [97] [4000/6250] eta: 0:05:35 lr: 0.000000 grad: 0.1878 (0.2555) loss: 0.6682 (0.6617) time: 0.1436 data: 0.0510 max mem: 9377 +Train: [97] [4100/6250] eta: 0:05:19 lr: 0.000000 grad: 0.1894 (0.2562) loss: 0.6650 (0.6617) time: 0.1374 data: 0.0521 max mem: 9377 +Train: [97] [4200/6250] eta: 0:05:04 lr: 0.000000 grad: 0.1918 (0.2560) loss: 0.6672 (0.6618) time: 0.1288 data: 0.0405 max mem: 9377 +Train: [97] [4300/6250] eta: 0:04:49 lr: 0.000000 grad: 0.1887 (0.2557) loss: 0.6548 (0.6618) time: 0.1543 data: 0.0742 max mem: 9377 +Train: [97] [4400/6250] eta: 0:04:34 lr: 0.000000 grad: 0.1957 (0.2557) loss: 0.6509 (0.6617) time: 0.1665 data: 0.0779 max mem: 9377 +Train: [97] [4500/6250] eta: 0:04:19 lr: 0.000000 grad: 0.1965 (0.2554) loss: 0.6612 (0.6617) time: 0.1498 data: 0.0643 max mem: 9377 +Train: [97] [4600/6250] eta: 0:04:04 lr: 0.000000 grad: 0.1932 (0.2558) loss: 0.6581 (0.6617) time: 0.1357 data: 0.0480 max mem: 9377 +Train: [97] [4700/6250] eta: 0:03:50 lr: 0.000000 grad: 0.1955 (0.2554) loss: 0.6543 (0.6617) time: 0.1283 data: 0.0378 max mem: 9377 +Train: [97] [4800/6250] eta: 0:03:35 lr: 0.000000 grad: 0.1932 (0.2547) loss: 0.6536 (0.6617) time: 0.1413 data: 0.0546 max mem: 9377 +Train: [97] [4900/6250] eta: 0:03:20 lr: 0.000000 grad: 0.1913 (0.2541) loss: 0.6566 (0.6616) time: 0.1662 data: 0.0758 max mem: 9377 +Train: [97] [5000/6250] eta: 0:03:05 lr: 0.000000 grad: 0.1910 (0.2533) loss: 0.6617 (0.6616) time: 0.1439 data: 0.0529 max mem: 9377 +Train: [97] [5100/6250] eta: 0:02:50 lr: 0.000000 grad: 0.1953 (0.2542) loss: 0.6573 (0.6615) time: 0.1270 data: 0.0407 max mem: 9377 +Train: [97] [5200/6250] eta: 0:02:36 lr: 0.000000 grad: 0.1941 (0.2538) loss: 0.6438 (0.6614) time: 0.1494 data: 0.0643 max mem: 9377 +Train: [97] [5300/6250] eta: 0:02:21 lr: 0.000000 grad: 0.1899 (0.2539) loss: 0.6561 (0.6612) time: 0.1653 data: 0.0811 max mem: 9377 +Train: [97] [5400/6250] eta: 0:02:06 lr: 0.000000 grad: 0.2009 (0.2536) loss: 0.6547 (0.6611) time: 0.1708 data: 0.0877 max mem: 9377 +Train: [97] [5500/6250] eta: 0:01:52 lr: 0.000000 grad: 0.2057 (0.2538) loss: 0.6480 (0.6609) time: 0.1835 data: 0.1052 max mem: 9377 +Train: [97] [5600/6250] eta: 0:01:37 lr: 0.000000 grad: 0.1938 (0.2535) loss: 0.6561 (0.6608) time: 0.1273 data: 0.0421 max mem: 9377 +Train: [97] [5700/6250] eta: 0:01:22 lr: 0.000000 grad: 0.1999 (0.2537) loss: 0.6444 (0.6606) time: 0.1301 data: 0.0432 max mem: 9377 +Train: [97] [5800/6250] eta: 0:01:07 lr: 0.000000 grad: 0.1997 (0.2543) loss: 0.6465 (0.6605) time: 0.1489 data: 0.0726 max mem: 9377 +Train: [97] [5900/6250] eta: 0:00:52 lr: 0.000000 grad: 0.2019 (0.2540) loss: 0.6570 (0.6604) time: 0.1718 data: 0.0825 max mem: 9377 +Train: [97] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.1958 (0.2537) loss: 0.6577 (0.6604) time: 0.1509 data: 0.0568 max mem: 9377 +Train: [97] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.1945 (0.2533) loss: 0.6596 (0.6605) time: 0.1407 data: 0.0466 max mem: 9377 +Train: [97] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1899 (0.2533) loss: 0.6561 (0.6604) time: 0.1417 data: 0.0524 max mem: 9377 +Train: [97] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1941 (0.2533) loss: 0.6618 (0.6604) time: 0.1375 data: 0.0467 max mem: 9377 +Train: [97] Total time: 0:15:41 (0.1506 s / it) +Averaged stats: lr: 0.000000 grad: 0.1941 (0.2533) loss: 0.6618 (0.6604) +Eval (hcp-train-subset): [97] [ 0/62] eta: 0:05:58 loss: 0.9012 (0.9012) time: 5.7879 data: 5.7435 max mem: 9377 +Eval (hcp-train-subset): [97] [61/62] eta: 0:00:00 loss: 0.9144 (0.9136) time: 0.1462 data: 0.1209 max mem: 9377 +Eval (hcp-train-subset): [97] Total time: 0:00:14 (0.2387 s / it) +Averaged stats (hcp-train-subset): loss: 0.9144 (0.9136) +Eval (hcp-val): [97] [ 0/62] eta: 0:04:35 loss: 0.9086 (0.9086) time: 4.4364 data: 4.3417 max mem: 9377 +Eval (hcp-val): [97] [61/62] eta: 0:00:00 loss: 0.9119 (0.9121) time: 0.1226 data: 0.0975 max mem: 9377 +Eval (hcp-val): [97] Total time: 0:00:14 (0.2357 s / it) +Averaged stats (hcp-val): loss: 0.9119 (0.9121) +Eval (nsd-val): [97] [ 0/62] eta: 0:05:32 loss: 0.9098 (0.9098) time: 5.3678 data: 5.3362 max mem: 9377 +Eval (nsd-val): [97] [61/62] eta: 0:00:00 loss: 0.9153 (0.9197) time: 0.1373 data: 0.1105 max mem: 9377 +Eval (nsd-val): [97] Total time: 0:00:14 (0.2316 s / it) +Averaged stats (nsd-val): loss: 0.9153 (0.9197) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [98] [ 0/6250] eta: 12:18:07 lr: 0.000000 grad: 0.2433 (0.2433) loss: 0.6681 (0.6681) time: 7.0860 data: 6.9874 max mem: 9377 +Train: [98] [ 100/6250] eta: 0:21:50 lr: 0.000000 grad: 0.1971 (0.2756) loss: 0.6785 (0.6889) time: 0.1427 data: 0.0486 max mem: 9377 +Train: [98] [ 200/6250] eta: 0:18:30 lr: 0.000000 grad: 0.2039 (0.2589) loss: 0.6659 (0.6761) time: 0.1590 data: 0.0465 max mem: 9377 +Train: [98] [ 300/6250] eta: 0:17:07 lr: 0.000000 grad: 0.2008 (0.2650) loss: 0.6584 (0.6696) time: 0.1486 data: 0.0467 max mem: 9377 +Train: [98] [ 400/6250] eta: 0:16:10 lr: 0.000000 grad: 0.1949 (0.2660) loss: 0.6531 (0.6650) time: 0.1295 data: 0.0311 max mem: 9377 +Train: [98] [ 500/6250] eta: 0:15:41 lr: 0.000000 grad: 0.1945 (0.2660) loss: 0.6591 (0.6625) time: 0.1467 data: 0.0510 max mem: 9377 +Train: [98] [ 600/6250] eta: 0:15:12 lr: 0.000000 grad: 0.1942 (0.2583) loss: 0.6597 (0.6615) time: 0.1323 data: 0.0386 max mem: 9377 +Train: [98] [ 700/6250] eta: 0:14:50 lr: 0.000000 grad: 0.1971 (0.2566) loss: 0.6716 (0.6617) time: 0.1503 data: 0.0604 max mem: 9377 +Train: [98] [ 800/6250] eta: 0:14:28 lr: 0.000000 grad: 0.1981 (0.2621) loss: 0.6600 (0.6618) time: 0.1002 data: 0.0102 max mem: 9377 +Train: [98] [ 900/6250] eta: 0:14:04 lr: 0.000000 grad: 0.1930 (0.2590) loss: 0.6690 (0.6617) time: 0.1319 data: 0.0349 max mem: 9377 +Train: [98] [1000/6250] eta: 0:13:44 lr: 0.000000 grad: 0.2004 (0.2571) loss: 0.6662 (0.6620) time: 0.1415 data: 0.0487 max mem: 9377 +Train: [98] [1100/6250] eta: 0:13:28 lr: 0.000000 grad: 0.1951 (0.2548) loss: 0.6675 (0.6624) time: 0.1833 data: 0.0937 max mem: 9377 +Train: [98] [1200/6250] eta: 0:13:14 lr: 0.000000 grad: 0.1918 (0.2546) loss: 0.6721 (0.6626) time: 0.1618 data: 0.0766 max mem: 9377 +Train: [98] [1300/6250] eta: 0:12:58 lr: 0.000000 grad: 0.1948 (0.2542) loss: 0.6714 (0.6628) time: 0.1434 data: 0.0699 max mem: 9377 +Train: [98] [1400/6250] eta: 0:12:37 lr: 0.000000 grad: 0.1965 (0.2527) loss: 0.6700 (0.6631) time: 0.1365 data: 0.0539 max mem: 9377 +Train: [98] [1500/6250] eta: 0:12:24 lr: 0.000000 grad: 0.2023 (0.2512) loss: 0.6552 (0.6631) time: 0.1610 data: 0.0678 max mem: 9377 +Train: [98] [1600/6250] eta: 0:12:06 lr: 0.000000 grad: 0.1896 (0.2521) loss: 0.6434 (0.6628) time: 0.1566 data: 0.0635 max mem: 9377 +Train: [98] [1700/6250] eta: 0:11:48 lr: 0.000000 grad: 0.1945 (0.2537) loss: 0.6593 (0.6625) time: 0.1369 data: 0.0445 max mem: 9377 +Train: [98] [1800/6250] eta: 0:11:31 lr: 0.000000 grad: 0.1931 (0.2543) loss: 0.6635 (0.6627) time: 0.1511 data: 0.0599 max mem: 9377 +Train: [98] [1900/6250] eta: 0:11:12 lr: 0.000000 grad: 0.1920 (0.2552) loss: 0.6626 (0.6629) time: 0.1308 data: 0.0367 max mem: 9377 +Train: [98] [2000/6250] eta: 0:10:57 lr: 0.000000 grad: 0.1904 (0.2545) loss: 0.6738 (0.6631) time: 0.1641 data: 0.0871 max mem: 9377 +Train: [98] [2100/6250] eta: 0:10:39 lr: 0.000000 grad: 0.1939 (0.2533) loss: 0.6778 (0.6631) time: 0.1401 data: 0.0502 max mem: 9377 +Train: [98] [2200/6250] eta: 0:10:21 lr: 0.000000 grad: 0.1884 (0.2528) loss: 0.6739 (0.6633) time: 0.1326 data: 0.0409 max mem: 9377 +Train: [98] [2300/6250] eta: 0:10:05 lr: 0.000000 grad: 0.1922 (0.2531) loss: 0.6755 (0.6634) time: 0.1529 data: 0.0679 max mem: 9377 +Train: [98] [2400/6250] eta: 0:09:48 lr: 0.000000 grad: 0.1954 (0.2521) loss: 0.6382 (0.6633) time: 0.1273 data: 0.0367 max mem: 9377 +Train: [98] [2500/6250] eta: 0:09:32 lr: 0.000000 grad: 0.1935 (0.2515) loss: 0.6679 (0.6634) time: 0.1643 data: 0.0811 max mem: 9377 +Train: [98] [2600/6250] eta: 0:09:15 lr: 0.000000 grad: 0.2127 (0.2523) loss: 0.6576 (0.6636) time: 0.1483 data: 0.0446 max mem: 9377 +Train: [98] [2700/6250] eta: 0:08:59 lr: 0.000000 grad: 0.1916 (0.2524) loss: 0.6639 (0.6638) time: 0.1441 data: 0.0492 max mem: 9377 +Train: [98] [2800/6250] eta: 0:08:42 lr: 0.000000 grad: 0.1898 (0.2530) loss: 0.6730 (0.6639) time: 0.1304 data: 0.0318 max mem: 9377 +Train: [98] [2900/6250] eta: 0:08:25 lr: 0.000000 grad: 0.1997 (0.2527) loss: 0.6665 (0.6640) time: 0.1303 data: 0.0482 max mem: 9377 +Train: [98] [3000/6250] eta: 0:08:10 lr: 0.000000 grad: 0.1919 (0.2536) loss: 0.6590 (0.6640) time: 0.1345 data: 0.0556 max mem: 9377 +Train: [98] [3100/6250] eta: 0:07:56 lr: 0.000000 grad: 0.1875 (0.2531) loss: 0.6812 (0.6641) time: 0.1551 data: 0.0674 max mem: 9377 +Train: [98] [3200/6250] eta: 0:07:41 lr: 0.000000 grad: 0.1938 (0.2532) loss: 0.6636 (0.6640) time: 0.1592 data: 0.0725 max mem: 9377 +Train: [98] [3300/6250] eta: 0:07:27 lr: 0.000000 grad: 0.2013 (0.2531) loss: 0.6690 (0.6640) time: 0.1512 data: 0.0696 max mem: 9377 +Train: [98] [3400/6250] eta: 0:07:12 lr: 0.000000 grad: 0.1986 (0.2534) loss: 0.6430 (0.6640) time: 0.1483 data: 0.0554 max mem: 9377 +Train: [98] [3500/6250] eta: 0:06:57 lr: 0.000000 grad: 0.1943 (0.2543) loss: 0.6775 (0.6639) time: 0.1381 data: 0.0468 max mem: 9377 +Train: [98] [3600/6250] eta: 0:06:41 lr: 0.000000 grad: 0.1886 (0.2532) loss: 0.6750 (0.6639) time: 0.1447 data: 0.0497 max mem: 9377 +Train: [98] [3700/6250] eta: 0:06:26 lr: 0.000000 grad: 0.1926 (0.2529) loss: 0.6572 (0.6639) time: 0.1562 data: 0.0657 max mem: 9377 +Train: [98] [3800/6250] eta: 0:06:10 lr: 0.000000 grad: 0.1898 (0.2526) loss: 0.6686 (0.6639) time: 0.1345 data: 0.0383 max mem: 9377 +Train: [98] [3900/6250] eta: 0:05:55 lr: 0.000000 grad: 0.1914 (0.2526) loss: 0.6648 (0.6639) time: 0.1614 data: 0.0720 max mem: 9377 +Train: [98] [4000/6250] eta: 0:05:39 lr: 0.000000 grad: 0.1919 (0.2529) loss: 0.6602 (0.6638) time: 0.1084 data: 0.0208 max mem: 9377 +Train: [98] [4100/6250] eta: 0:05:24 lr: 0.000000 grad: 0.1930 (0.2525) loss: 0.6613 (0.6637) time: 0.1482 data: 0.0630 max mem: 9377 +Train: [98] [4200/6250] eta: 0:05:08 lr: 0.000000 grad: 0.1960 (0.2523) loss: 0.6696 (0.6636) time: 0.1154 data: 0.0264 max mem: 9377 +Train: [98] [4300/6250] eta: 0:04:52 lr: 0.000000 grad: 0.1884 (0.2522) loss: 0.6704 (0.6636) time: 0.1120 data: 0.0253 max mem: 9377 +Train: [98] [4400/6250] eta: 0:04:37 lr: 0.000000 grad: 0.1976 (0.2526) loss: 0.6528 (0.6636) time: 0.1477 data: 0.0688 max mem: 9377 +Train: [98] [4500/6250] eta: 0:04:22 lr: 0.000000 grad: 0.1904 (0.2523) loss: 0.6672 (0.6635) time: 0.1394 data: 0.0548 max mem: 9377 +Train: [98] [4600/6250] eta: 0:04:06 lr: 0.000000 grad: 0.2270 (0.2520) loss: 0.6741 (0.6634) time: 0.1446 data: 0.0609 max mem: 9377 +Train: [98] [4700/6250] eta: 0:03:51 lr: 0.000000 grad: 0.1945 (0.2517) loss: 0.6720 (0.6634) time: 0.1293 data: 0.0468 max mem: 9377 +Train: [98] [4800/6250] eta: 0:03:36 lr: 0.000000 grad: 0.1938 (0.2517) loss: 0.6668 (0.6634) time: 0.1281 data: 0.0451 max mem: 9377 +Train: [98] [4900/6250] eta: 0:03:21 lr: 0.000000 grad: 0.1959 (0.2515) loss: 0.6647 (0.6634) time: 0.1482 data: 0.0571 max mem: 9377 +Train: [98] [5000/6250] eta: 0:03:06 lr: 0.000000 grad: 0.1932 (0.2514) loss: 0.6644 (0.6635) time: 0.1152 data: 0.0248 max mem: 9377 +Train: [98] [5100/6250] eta: 0:02:51 lr: 0.000000 grad: 0.1944 (0.2523) loss: 0.6738 (0.6635) time: 0.1354 data: 0.0486 max mem: 9377 +Train: [98] [5200/6250] eta: 0:02:36 lr: 0.000000 grad: 0.2012 (0.2530) loss: 0.6623 (0.6635) time: 0.1372 data: 0.0524 max mem: 9377 +Train: [98] [5300/6250] eta: 0:02:21 lr: 0.000000 grad: 0.1949 (0.2526) loss: 0.6642 (0.6636) time: 0.1447 data: 0.0580 max mem: 9377 +Train: [98] [5400/6250] eta: 0:02:06 lr: 0.000000 grad: 0.1936 (0.2524) loss: 0.6602 (0.6635) time: 0.1732 data: 0.0955 max mem: 9377 +Train: [98] [5500/6250] eta: 0:01:51 lr: 0.000000 grad: 0.1928 (0.2521) loss: 0.6505 (0.6634) time: 0.1467 data: 0.0680 max mem: 9377 +Train: [98] [5600/6250] eta: 0:01:37 lr: 0.000000 grad: 0.1972 (0.2527) loss: 0.6552 (0.6632) time: 0.1532 data: 0.0748 max mem: 9377 +Train: [98] [5700/6250] eta: 0:01:22 lr: 0.000000 grad: 0.1893 (0.2527) loss: 0.6634 (0.6631) time: 0.1666 data: 0.0796 max mem: 9377 +Train: [98] [5800/6250] eta: 0:01:07 lr: 0.000000 grad: 0.2174 (0.2530) loss: 0.6540 (0.6630) time: 0.1377 data: 0.0497 max mem: 9377 +Train: [98] [5900/6250] eta: 0:00:52 lr: 0.000000 grad: 0.1911 (0.2528) loss: 0.6553 (0.6629) time: 0.1399 data: 0.0514 max mem: 9377 +Train: [98] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.1921 (0.2527) loss: 0.6538 (0.6628) time: 0.1670 data: 0.0775 max mem: 9377 +Train: [98] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.2006 (0.2526) loss: 0.6563 (0.6627) time: 0.1368 data: 0.0423 max mem: 9377 +Train: [98] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1983 (0.2526) loss: 0.6782 (0.6626) time: 0.1433 data: 0.0565 max mem: 9377 +Train: [98] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1968 (0.2527) loss: 0.6543 (0.6625) time: 0.1376 data: 0.0470 max mem: 9377 +Train: [98] Total time: 0:15:40 (0.1505 s / it) +Averaged stats: lr: 0.000000 grad: 0.1968 (0.2527) loss: 0.6543 (0.6625) +Eval (hcp-train-subset): [98] [ 0/62] eta: 0:05:49 loss: 0.9024 (0.9024) time: 5.6362 data: 5.6050 max mem: 9377 +Eval (hcp-train-subset): [98] [61/62] eta: 0:00:00 loss: 0.9130 (0.9132) time: 0.1386 data: 0.1120 max mem: 9377 +Eval (hcp-train-subset): [98] Total time: 0:00:13 (0.2246 s / it) +Averaged stats (hcp-train-subset): loss: 0.9130 (0.9132) +Eval (hcp-val): [98] [ 0/62] eta: 0:05:16 loss: 0.9167 (0.9167) time: 5.1102 data: 5.0185 max mem: 9377 +Eval (hcp-val): [98] [61/62] eta: 0:00:00 loss: 0.9127 (0.9136) time: 0.1363 data: 0.1095 max mem: 9377 +Eval (hcp-val): [98] Total time: 0:00:14 (0.2300 s / it) +Averaged stats (hcp-val): loss: 0.9127 (0.9136) +Eval (nsd-val): [98] [ 0/62] eta: 0:05:06 loss: 0.9267 (0.9267) time: 4.9382 data: 4.8741 max mem: 9377 +Eval (nsd-val): [98] [61/62] eta: 0:00:00 loss: 0.9164 (0.9195) time: 0.1051 data: 0.0781 max mem: 9377 +Eval (nsd-val): [98] Total time: 0:00:15 (0.2526 s / it) +Averaged stats (nsd-val): loss: 0.9164 (0.9195) +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +Train: [99] [ 0/6250] eta: 10:37:31 lr: 0.000000 grad: 0.3824 (0.3824) loss: 0.7641 (0.7641) time: 6.1203 data: 5.8450 max mem: 9377 +Train: [99] [ 100/6250] eta: 0:23:14 lr: 0.000000 grad: 0.1925 (0.2302) loss: 0.6902 (0.6871) time: 0.1593 data: 0.0601 max mem: 9377 +Train: [99] [ 200/6250] eta: 0:19:05 lr: 0.000000 grad: 0.2084 (0.2459) loss: 0.6597 (0.6784) time: 0.1448 data: 0.0431 max mem: 9377 +Train: [99] [ 300/6250] eta: 0:18:41 lr: 0.000000 grad: 0.2011 (0.2685) loss: 0.6416 (0.6718) time: 0.1590 data: 0.0473 max mem: 9377 +Train: [99] [ 400/6250] eta: 0:17:22 lr: 0.000000 grad: 0.1894 (0.2647) loss: 0.6734 (0.6693) time: 0.1334 data: 0.0349 max mem: 9377 +Train: [99] [ 500/6250] eta: 0:16:33 lr: 0.000000 grad: 0.1954 (0.2546) loss: 0.6523 (0.6670) time: 0.1661 data: 0.0767 max mem: 9377 +Train: [99] [ 600/6250] eta: 0:15:56 lr: 0.000000 grad: 0.1899 (0.2521) loss: 0.6678 (0.6665) time: 0.1737 data: 0.0762 max mem: 9377 +Train: [99] [ 700/6250] eta: 0:15:28 lr: 0.000000 grad: 0.1955 (0.2493) loss: 0.6553 (0.6659) time: 0.1189 data: 0.0180 max mem: 9377 +Train: [99] [ 800/6250] eta: 0:14:59 lr: 0.000000 grad: 0.1937 (0.2512) loss: 0.6520 (0.6658) time: 0.1377 data: 0.0333 max mem: 9377 +Train: [99] [ 900/6250] eta: 0:14:33 lr: 0.000000 grad: 0.1935 (0.2538) loss: 0.6428 (0.6646) time: 0.1586 data: 0.0638 max mem: 9377 +Train: [99] [1000/6250] eta: 0:14:11 lr: 0.000000 grad: 0.1912 (0.2521) loss: 0.6560 (0.6638) time: 0.1589 data: 0.0744 max mem: 9377 +Train: [99] [1100/6250] eta: 0:13:54 lr: 0.000000 grad: 0.1995 (0.2504) loss: 0.6566 (0.6634) time: 0.1642 data: 0.0788 max mem: 9377 +Train: [99] [1200/6250] eta: 0:13:33 lr: 0.000000 grad: 0.1955 (0.2477) loss: 0.6465 (0.6628) time: 0.1608 data: 0.0742 max mem: 9377 +Train: [99] [1300/6250] eta: 0:13:15 lr: 0.000000 grad: 0.1946 (0.2480) loss: 0.6565 (0.6628) time: 0.1639 data: 0.0830 max mem: 9377 +Train: [99] [1400/6250] eta: 0:12:54 lr: 0.000000 grad: 0.2036 (0.2490) loss: 0.6622 (0.6624) time: 0.1468 data: 0.0609 max mem: 9377 +Train: [99] [1500/6250] eta: 0:12:39 lr: 0.000000 grad: 0.1906 (0.2475) loss: 0.6751 (0.6625) time: 0.1651 data: 0.0774 max mem: 9377 +Train: [99] [1600/6250] eta: 0:12:21 lr: 0.000000 grad: 0.1976 (0.2499) loss: 0.6541 (0.6623) time: 0.1327 data: 0.0452 max mem: 9377 +Train: [99] [1700/6250] eta: 0:12:05 lr: 0.000000 grad: 0.1939 (0.2490) loss: 0.6556 (0.6617) time: 0.1425 data: 0.0602 max mem: 9377 +Train: [99] [1800/6250] eta: 0:11:43 lr: 0.000000 grad: 0.1940 (0.2500) loss: 0.6597 (0.6613) time: 0.1389 data: 0.0354 max mem: 9377 +Train: [99] [1900/6250] eta: 0:11:25 lr: 0.000000 grad: 0.1920 (0.2508) loss: 0.6584 (0.6610) time: 0.1419 data: 0.0494 max mem: 9377 +Train: [99] [2000/6250] eta: 0:11:06 lr: 0.000000 grad: 0.1985 (0.2498) loss: 0.6725 (0.6610) time: 0.1397 data: 0.0508 max mem: 9377 +Train: [99] [2100/6250] eta: 0:10:48 lr: 0.000000 grad: 0.1972 (0.2505) loss: 0.6637 (0.6610) time: 0.1298 data: 0.0434 max mem: 9377 +Train: [99] [2200/6250] eta: 0:10:32 lr: 0.000000 grad: 0.1930 (0.2504) loss: 0.6664 (0.6612) time: 0.1634 data: 0.0799 max mem: 9377 +Train: [99] [2300/6250] eta: 0:10:13 lr: 0.000000 grad: 0.1884 (0.2494) loss: 0.6613 (0.6615) time: 0.1448 data: 0.0612 max mem: 9377 +Train: [99] [2400/6250] eta: 0:09:55 lr: 0.000000 grad: 0.1902 (0.2491) loss: 0.6648 (0.6616) time: 0.1423 data: 0.0564 max mem: 9377 +Train: [99] [2500/6250] eta: 0:09:38 lr: 0.000000 grad: 0.2005 (0.2484) loss: 0.6728 (0.6619) time: 0.1256 data: 0.0395 max mem: 9377 +Train: [99] [2600/6250] eta: 0:09:21 lr: 0.000000 grad: 0.2064 (0.2485) loss: 0.6637 (0.6623) time: 0.1647 data: 0.0810 max mem: 9377 +Train: [99] [2700/6250] eta: 0:09:04 lr: 0.000000 grad: 0.1900 (0.2472) loss: 0.6729 (0.6626) time: 0.1427 data: 0.0490 max mem: 9377 +Train: [99] [2800/6250] eta: 0:08:47 lr: 0.000000 grad: 0.2022 (0.2473) loss: 0.6602 (0.6627) time: 0.1338 data: 0.0476 max mem: 9377 +Train: [99] [2900/6250] eta: 0:08:30 lr: 0.000000 grad: 0.2005 (0.2477) loss: 0.6686 (0.6628) time: 0.1373 data: 0.0465 max mem: 9377 +Train: [99] [3000/6250] eta: 0:08:15 lr: 0.000000 grad: 0.1924 (0.2468) loss: 0.6673 (0.6630) time: 0.1583 data: 0.0825 max mem: 9377 +Train: [99] [3100/6250] eta: 0:08:01 lr: 0.000000 grad: 0.1932 (0.2463) loss: 0.6686 (0.6630) time: 0.1658 data: 0.0820 max mem: 9377 +Train: [99] [3200/6250] eta: 0:07:46 lr: 0.000000 grad: 0.1927 (0.2468) loss: 0.6612 (0.6631) time: 0.1429 data: 0.0544 max mem: 9377 +Train: [99] [3300/6250] eta: 0:07:31 lr: 0.000000 grad: 0.1915 (0.2469) loss: 0.6735 (0.6633) time: 0.1341 data: 0.0502 max mem: 9377 +Train: [99] [3400/6250] eta: 0:07:16 lr: 0.000000 grad: 0.1957 (0.2464) loss: 0.6559 (0.6633) time: 0.1694 data: 0.0859 max mem: 9377 +Train: [99] [3500/6250] eta: 0:07:01 lr: 0.000000 grad: 0.1964 (0.2462) loss: 0.6516 (0.6632) time: 0.1903 data: 0.1045 max mem: 9377 +Train: [99] [3600/6250] eta: 0:06:45 lr: 0.000000 grad: 0.1893 (0.2457) loss: 0.6619 (0.6630) time: 0.1419 data: 0.0480 max mem: 9377 +Train: [99] [3700/6250] eta: 0:06:29 lr: 0.000000 grad: 0.1901 (0.2452) loss: 0.6588 (0.6630) time: 0.1593 data: 0.0846 max mem: 9377 +Train: [99] [3800/6250] eta: 0:06:14 lr: 0.000000 grad: 0.1929 (0.2450) loss: 0.6602 (0.6630) time: 0.1412 data: 0.0509 max mem: 9377 +Train: [99] [3900/6250] eta: 0:05:57 lr: 0.000000 grad: 0.1917 (0.2449) loss: 0.6614 (0.6630) time: 0.1310 data: 0.0337 max mem: 9377 +Train: [99] [4000/6250] eta: 0:05:42 lr: 0.000000 grad: 0.1924 (0.2447) loss: 0.6533 (0.6630) time: 0.1480 data: 0.0592 max mem: 9377 +Train: [99] [4100/6250] eta: 0:05:26 lr: 0.000000 grad: 0.1954 (0.2449) loss: 0.6461 (0.6628) time: 0.1247 data: 0.0320 max mem: 9377 +Train: [99] [4200/6250] eta: 0:05:10 lr: 0.000000 grad: 0.1975 (0.2449) loss: 0.6551 (0.6627) time: 0.1452 data: 0.0581 max mem: 9377 +Train: [99] [4300/6250] eta: 0:04:54 lr: 0.000000 grad: 0.1981 (0.2451) loss: 0.6466 (0.6625) time: 0.1264 data: 0.0372 max mem: 9377 +Train: [99] [4400/6250] eta: 0:04:39 lr: 0.000000 grad: 0.1949 (0.2458) loss: 0.6582 (0.6625) time: 0.1433 data: 0.0532 max mem: 9377 +Train: [99] [4500/6250] eta: 0:04:23 lr: 0.000000 grad: 0.1908 (0.2456) loss: 0.6591 (0.6625) time: 0.1394 data: 0.0449 max mem: 9377 +Train: [99] [4600/6250] eta: 0:04:07 lr: 0.000000 grad: 0.1914 (0.2459) loss: 0.6630 (0.6627) time: 0.1486 data: 0.0600 max mem: 9377 +Train: [99] [4700/6250] eta: 0:03:52 lr: 0.000000 grad: 0.1863 (0.2459) loss: 0.6677 (0.6628) time: 0.1387 data: 0.0536 max mem: 9377 +Train: [99] [4800/6250] eta: 0:03:37 lr: 0.000000 grad: 0.1905 (0.2461) loss: 0.6620 (0.6629) time: 0.1521 data: 0.0629 max mem: 9377 +Train: [99] [4900/6250] eta: 0:03:21 lr: 0.000000 grad: 0.1941 (0.2463) loss: 0.6657 (0.6630) time: 0.1387 data: 0.0561 max mem: 9377 +Train: [99] [5000/6250] eta: 0:03:06 lr: 0.000000 grad: 0.1893 (0.2466) loss: 0.6661 (0.6631) time: 0.1360 data: 0.0409 max mem: 9377 +Train: [99] [5100/6250] eta: 0:02:51 lr: 0.000000 grad: 0.1941 (0.2466) loss: 0.6504 (0.6631) time: 0.1406 data: 0.0558 max mem: 9377 +Train: [99] [5200/6250] eta: 0:02:36 lr: 0.000000 grad: 0.1947 (0.2465) loss: 0.6602 (0.6630) time: 0.1461 data: 0.0613 max mem: 9377 +Train: [99] [5300/6250] eta: 0:02:21 lr: 0.000000 grad: 0.1964 (0.2462) loss: 0.6611 (0.6631) time: 0.1820 data: 0.0993 max mem: 9377 +Train: [99] [5400/6250] eta: 0:02:07 lr: 0.000000 grad: 0.1960 (0.2456) loss: 0.6631 (0.6631) time: 0.1729 data: 0.0895 max mem: 9377 +Train: [99] [5500/6250] eta: 0:01:52 lr: 0.000000 grad: 0.1930 (0.2455) loss: 0.6455 (0.6630) time: 0.1544 data: 0.0587 max mem: 9377 +Train: [99] [5600/6250] eta: 0:01:37 lr: 0.000000 grad: 0.1951 (0.2456) loss: 0.6404 (0.6629) time: 0.1584 data: 0.0763 max mem: 9377 +Train: [99] [5700/6250] eta: 0:01:22 lr: 0.000000 grad: 0.1989 (0.2465) loss: 0.6567 (0.6627) time: 0.1592 data: 0.0720 max mem: 9377 +Train: [99] [5800/6250] eta: 0:01:07 lr: 0.000000 grad: 0.1943 (0.2462) loss: 0.6487 (0.6626) time: 0.1445 data: 0.0540 max mem: 9377 +Train: [99] [5900/6250] eta: 0:00:52 lr: 0.000000 grad: 0.1980 (0.2463) loss: 0.6351 (0.6623) time: 0.1694 data: 0.0885 max mem: 9377 +Train: [99] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.2002 (0.2464) loss: 0.6531 (0.6621) time: 0.1486 data: 0.0521 max mem: 9377 +Train: [99] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.1979 (0.2468) loss: 0.6477 (0.6619) time: 0.1481 data: 0.0552 max mem: 9377 +Train: [99] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1954 (0.2467) loss: 0.6461 (0.6617) time: 0.1512 data: 0.0463 max mem: 9377 +Train: [99] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2043 (0.2468) loss: 0.6473 (0.6616) time: 0.1567 data: 0.0680 max mem: 9377 +Train: [99] Total time: 0:15:41 (0.1506 s / it) +Averaged stats: lr: 0.000000 grad: 0.2043 (0.2468) loss: 0.6473 (0.6616) +Eval (hcp-train-subset): [99] [ 0/62] eta: 0:04:27 loss: 0.9023 (0.9023) time: 4.3198 data: 4.2634 max mem: 9377 +Eval (hcp-train-subset): [99] [61/62] eta: 0:00:00 loss: 0.9111 (0.9131) time: 0.1141 data: 0.0891 max mem: 9377 +Eval (hcp-train-subset): [99] Total time: 0:00:13 (0.2256 s / it) +Averaged stats (hcp-train-subset): loss: 0.9111 (0.9131) +Making plots (hcp-train-subset): example=4 +Eval (hcp-val): [99] [ 0/62] eta: 0:04:12 loss: 0.9169 (0.9169) time: 4.0801 data: 4.0176 max mem: 9377 +Eval (hcp-val): [99] [61/62] eta: 0:00:00 loss: 0.9098 (0.9125) time: 0.1378 data: 0.1126 max mem: 9377 +Eval (hcp-val): [99] Total time: 0:00:14 (0.2298 s / it) +Averaged stats (hcp-val): loss: 0.9098 (0.9125) +Making plots (hcp-val): example=41 +Eval (nsd-val): [99] [ 0/62] eta: 0:05:49 loss: 0.9237 (0.9237) time: 5.6391 data: 5.6088 max mem: 9377 +Eval (nsd-val): [99] [61/62] eta: 0:00:00 loss: 0.9157 (0.9198) time: 0.1170 data: 0.0903 max mem: 9377 +Eval (nsd-val): [99] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (nsd-val): loss: 0.9157 (0.9198) +Making plots (nsd-val): example=43 +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n100_2/pretrain/checkpoint-00099.pth +done! training time: 1 day, 5:33:03 diff --git a/data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic/config.yaml b/data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c88ff6bbfd3d3039c9aaa3f3d62261625537031 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic +remote_dir: null diff --git a/data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic/eval_table.csv b/data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..31728a22ffb25ef035992c7dd927689a9e523377 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_age,,9.999999999999999e-05,train,0.49015748031496065,0.021037678511847124,0.46513721069570185,0.02146056717209489,0.4872576449609566,0.020889621931423925 +flat_mae,patch,logistic,aabc_age,,9.999999999999999e-05,test,0.28846153846153844,0.057369610329416676,0.2520335985853227,0.05067700869686767,0.2774725274725275,0.055732576800590475 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,train,0.8562992125984252,0.015897847904138982,0.8564523836955984,0.016010465482733185,0.8572796654705027,0.01585374283343322 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,test,0.46153846153846156,0.06124413992012797,0.448951048951049,0.061843570504675774,0.45947802197802196,0.06077154372766225 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,train,0.5511811023622047,0.020932673607393024,0.5424317465398146,0.021659073418236804,0.550583153844826,0.020929558538618893 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,test,0.5384615384615384,0.06527588244062554,0.5339433551198256,0.06807932188148856,0.5338827838827839,0.06522589118218011 +flat_mae,patch,logistic,aabc_age,3,0.005994842503189409,train,0.6732283464566929,0.020153095303066988,0.6729087449066967,0.020219208140012927,0.6727990041984008,0.020227420316866272 +flat_mae,patch,logistic,aabc_age,3,0.005994842503189409,test,0.40384615384615385,0.05889930982808587,0.38554376657824935,0.05951691557202573,0.4015567765567766,0.05885096166091861 +flat_mae,patch,logistic,aabc_age,4,0.046415888336127774,train,0.8582677165354331,0.01598557992621885,0.8578711800270983,0.016109103170358706,0.8585931041890028,0.015934085121817328 +flat_mae,patch,logistic,aabc_age,4,0.046415888336127774,test,0.5192307692307693,0.06714040750194049,0.5225378382237453,0.06678976575968712,0.5203754578754579,0.06730941528500405 +flat_mae,patch,logistic,aabc_age,5,0.046415888336127774,train,0.844488188976378,0.01589457944631792,0.8448703476774505,0.015852662775783215,0.8452328779471757,0.015805846545729897 +flat_mae,patch,logistic,aabc_age,5,0.046415888336127774,test,0.4230769230769231,0.06080421339729596,0.41138716356107663,0.06071568495113502,0.4207875457875458,0.06058450169798242 +flat_mae,patch,logistic,aabc_age,6,0.005994842503189409,train,0.6732283464566929,0.02095331071889261,0.6716360327159184,0.021269732803913033,0.6736692757231164,0.020978399731187206 +flat_mae,patch,logistic,aabc_age,6,0.005994842503189409,test,0.5769230769230769,0.06648245753402407,0.568101438791094,0.06885407094397733,0.5766941391941393,0.0665927979962554 +flat_mae,patch,logistic,aabc_age,7,0.046415888336127774,train,0.8543307086614174,0.015258401624601464,0.8548214601491473,0.015253652622346167,0.8551459418975084,0.015265850268148941 +flat_mae,patch,logistic,aabc_age,7,0.046415888336127774,test,0.4807692307692308,0.05875211188448951,0.4564244663382595,0.05584177111013447,0.4741300366300366,0.05765113856835151 +flat_mae,patch,logistic,aabc_age,8,0.046415888336127774,train,0.8523622047244095,0.01642804036582029,0.8514162400458645,0.016645776079599925,0.8515244855568491,0.016514268404154184 +flat_mae,patch,logistic,aabc_age,8,0.046415888336127774,test,0.5,0.06639842979536445,0.5112091356918943,0.06392374046333094,0.5057234432234432,0.06692120605464606 +flat_mae,patch,logistic,aabc_age,9,9.999999999999999e-05,train,0.4862204724409449,0.020306518724523052,0.45455063052889144,0.02067893635549608,0.4834539326683149,0.020163409102794745 +flat_mae,patch,logistic,aabc_age,9,9.999999999999999e-05,test,0.4807692307692308,0.06003347192591221,0.452821521570042,0.05688643140095756,0.4713827838827839,0.059065740735691784 +flat_mae,patch,logistic,aabc_age,10,0.005994842503189409,train,0.6811023622047244,0.019900800237269353,0.6796597693606397,0.020258621741022723,0.6809634936678756,0.01988782172207248 +flat_mae,patch,logistic,aabc_age,10,0.005994842503189409,test,0.46153846153846156,0.06778894980729774,0.46042572463768117,0.06690655397016707,0.45673076923076916,0.06751833962764149 +flat_mae,patch,logistic,aabc_age,11,0.046415888336127774,train,0.8464566929133859,0.016126773844934696,0.84595902754991,0.016334849376332984,0.8465139511957482,0.016161496974792373 +flat_mae,patch,logistic,aabc_age,11,0.046415888336127774,test,0.5576923076923077,0.06703456672564292,0.5635683760683761,0.06535473227557072,0.5620421245421245,0.06722734217956061 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,train,0.6830708661417323,0.020017232872683064,0.6824325928740189,0.020211505866201306,0.6836323263436705,0.019997159081589232 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,test,0.38461538461538464,0.06368713979405835,0.38389850889850885,0.06282552693782563,0.3825549450549451,0.0635861909928168 +flat_mae,patch,logistic,aabc_age,13,0.046415888336127774,train,0.8484251968503937,0.016120969938913227,0.8478379894531733,0.01623216762878139,0.8488476214496279,0.016122592483797344 +flat_mae,patch,logistic,aabc_age,13,0.046415888336127774,test,0.36538461538461536,0.06604557329207833,0.36962962962962964,0.06448047117487775,0.3676739926739927,0.06647401416183259 +flat_mae,patch,logistic,aabc_age,14,0.046415888336127774,train,0.8523622047244095,0.014976346903765732,0.8527718534536255,0.015006595628111772,0.8530298395248077,0.01498339885833821 +flat_mae,patch,logistic,aabc_age,14,0.046415888336127774,test,0.5769230769230769,0.06223851810651242,0.5698683110367893,0.06564828571412668,0.5798992673992674,0.06226882078726812 +flat_mae,patch,logistic,aabc_age,15,0.3593813663804626,train,0.9921259842519685,0.004120480791937253,0.9922114011958706,0.004074448077131415,0.9925881875145046,0.0038808052200207535 +flat_mae,patch,logistic,aabc_age,15,0.3593813663804626,test,0.4807692307692308,0.06720303951987633,0.4771033372891718,0.06803691766060141,0.47756410256410253,0.06713875408618737 +flat_mae,patch,logistic,aabc_age,16,0.005994842503189409,train,0.6751968503937008,0.020877346568163285,0.6747084842060703,0.021130395194145477,0.6749151065711014,0.020860679563761537 +flat_mae,patch,logistic,aabc_age,16,0.005994842503189409,test,0.4230769230769231,0.0654987691288657,0.42993265993265994,0.0660281608726854,0.42719780219780223,0.06586762566675118 +flat_mae,patch,logistic,aabc_age,17,9.999999999999999e-05,train,0.468503937007874,0.019833106128496297,0.4363070590634619,0.020079232014860657,0.46586150168108653,0.019557520426111367 +flat_mae,patch,logistic,aabc_age,17,9.999999999999999e-05,test,0.4807692307692308,0.05677958786071924,0.43205128205128207,0.05789846354034606,0.47275641025641024,0.05565036944950224 +flat_mae,patch,logistic,aabc_age,18,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,18,2.782559402207126,test,0.38461538461538464,0.06673203279672689,0.3857600732600732,0.06616524572809852,0.38850732600732596,0.06714848960375125 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,train,0.8543307086614174,0.015607015783226577,0.8543206217496859,0.015746675765515177,0.8545108594542654,0.015580944256853746 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,test,0.46153846153846156,0.06830599226921205,0.46480978260869565,0.06961204803772432,0.45993589743589747,0.06835062336760533 +flat_mae,patch,logistic,aabc_age,20,0.046415888336127774,train,0.844488188976378,0.015815102741839837,0.8441858724823972,0.01586495951498585,0.8445977955039328,0.015812706613251046 +flat_mae,patch,logistic,aabc_age,20,0.046415888336127774,test,0.5,0.05934149241376973,0.47508241758241754,0.06428332658098869,0.49793956043956045,0.059334255682819685 +flat_mae,patch,logistic,aabc_age,21,0.046415888336127774,train,0.844488188976378,0.016435296718890016,0.8436680375382586,0.016610743360975014,0.8440126997309111,0.01651183071871715 +flat_mae,patch,logistic,aabc_age,21,0.046415888336127774,test,0.40384615384615385,0.0632509208951367,0.3926406926406927,0.06281122948083084,0.40453296703296704,0.06361587115398405 +flat_mae,patch,logistic,aabc_age,22,0.3593813663804626,train,0.9921259842519685,0.004196080279758642,0.9923606825619449,0.004077474198340921,0.9925881875145045,0.00394380233081606 +flat_mae,patch,logistic,aabc_age,22,0.3593813663804626,test,0.3076923076923077,0.06228888605075377,0.32311499479018657,0.06229511396758932,0.30860805860805857,0.06235288844389938 +flat_mae,patch,logistic,aabc_age,23,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,23,2.782559402207126,test,0.4230769230769231,0.06004884599495185,0.4237169312169312,0.05817857687124815,0.42536630036630035,0.06060096521895294 +flat_mae,patch,logistic,aabc_age,24,0.005994842503189409,train,0.7086614173228346,0.02090113096735561,0.7072792685655898,0.02103011878592122,0.7088541376975732,0.020822031767064497 +flat_mae,patch,logistic,aabc_age,24,0.005994842503189409,test,0.4230769230769231,0.06625523313037064,0.4267741935483871,0.06579289452816635,0.4212454212454212,0.06619021690247019 +flat_mae,patch,logistic,aabc_age,25,0.005994842503189409,train,0.6909448818897638,0.020390840935890497,0.6903160171281428,0.0205142355276775,0.6914616533912299,0.020363274740752762 +flat_mae,patch,logistic,aabc_age,25,0.005994842503189409,test,0.36538461538461536,0.06271088681755177,0.35775162337662336,0.06315124341121144,0.3617216117216117,0.06231651483009279 +flat_mae,patch,logistic,aabc_age,26,0.000774263682681127,train,0.547244094488189,0.021746507313156305,0.5394685729735104,0.022060963822062577,0.546115760017952,0.021682527598067252 +flat_mae,patch,logistic,aabc_age,26,0.000774263682681127,test,0.40384615384615385,0.06289696420741527,0.3809693581662436,0.05966986775048064,0.3985805860805861,0.062176427896008135 +flat_mae,patch,logistic,aabc_age,27,0.046415888336127774,train,0.8622047244094488,0.01599959069707112,0.8621995877016455,0.015997847596672075,0.8627253355939616,0.015933798465263323 +flat_mae,patch,logistic,aabc_age,27,0.046415888336127774,test,0.38461538461538464,0.060313517673911084,0.357683976347989,0.05746858091206502,0.37934981684981683,0.05939219525579258 +flat_mae,patch,logistic,aabc_age,28,0.3593813663804626,train,0.9940944881889764,0.003357995391936395,0.9941394541162243,0.0033378471294543597,0.9943867486655837,0.0031998479680334295 +flat_mae,patch,logistic,aabc_age,28,0.3593813663804626,test,0.40384615384615385,0.06474688998950923,0.390485312899106,0.0640825388684165,0.4001831501831502,0.06434523346695864 +flat_mae,patch,logistic,aabc_age,29,9.999999999999999e-05,train,0.4862204724409449,0.019612869278340755,0.4574014262480931,0.019850250324620036,0.4830687835761783,0.019537146797369172 +flat_mae,patch,logistic,aabc_age,29,9.999999999999999e-05,test,0.36538461538461536,0.06434811942374681,0.34853535353535353,0.060751513516429295,0.3601190476190476,0.0634981557801382 +flat_mae,patch,logistic,aabc_age,30,0.046415888336127774,train,0.8366141732283464,0.015515645135491087,0.8364967235545592,0.01572845393460872,0.8361657514830577,0.015605294892886633 +flat_mae,patch,logistic,aabc_age,30,0.046415888336127774,test,0.5384615384615384,0.06651123275688277,0.5371799337316578,0.06594600536879391,0.5409798534798534,0.06667929524806138 +flat_mae,patch,logistic,aabc_age,31,9.999999999999999e-05,train,0.47834645669291337,0.02092645569209214,0.4476684648884335,0.02101836349702952,0.47538941653928257,0.020748517158279644 +flat_mae,patch,logistic,aabc_age,31,9.999999999999999e-05,test,0.4807692307692308,0.05878366465923362,0.42183908045977014,0.04909800992024476,0.47115384615384615,0.05730733013276767 +flat_mae,patch,logistic,aabc_age,32,0.005994842503189409,train,0.687007874015748,0.02126103634734988,0.6854968015450936,0.021591818342563865,0.6872794353160501,0.02120443111771496 +flat_mae,patch,logistic,aabc_age,32,0.005994842503189409,test,0.4423076923076923,0.06200801627886702,0.4472723955482576,0.06214717612657631,0.4448260073260073,0.06229503999114824 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,train,0.8622047244094488,0.015187298107564999,0.8620554785633645,0.015337125086181666,0.8617051040585819,0.01519935247411302 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,test,0.4807692307692308,0.06826479942281151,0.4767130857648099,0.0689634948773413,0.4819139194139194,0.06868282550850108 +flat_mae,patch,logistic,aabc_age,34,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,34,2.782559402207126,test,0.3269230769230769,0.05915584672019692,0.32964132641551996,0.05850913892679913,0.3351648351648352,0.0601171555418695 +flat_mae,patch,logistic,aabc_age,35,0.005994842503189409,train,0.6889763779527559,0.02031754546211985,0.6886902884440709,0.020488746264427333,0.6891279831373505,0.020355468625476533 +flat_mae,patch,logistic,aabc_age,35,0.005994842503189409,test,0.40384615384615385,0.06678999809966944,0.40172875370243794,0.0682856104774609,0.4017857142857143,0.06660701651241716 +flat_mae,patch,logistic,aabc_age,36,0.005994842503189409,train,0.6811023622047244,0.021631378777902607,0.6804639055547823,0.02170935709399752,0.6805783445757391,0.021653075437656332 +flat_mae,patch,logistic,aabc_age,36,0.005994842503189409,test,0.5,0.06534671978747095,0.49983852701244,0.06476011572251777,0.5041208791208791,0.06543085447767505 +flat_mae,patch,logistic,aabc_age,37,0.3593813663804626,train,0.9921259842519685,0.0038598964102162826,0.9918355105305252,0.004004579975611368,0.9918355105305252,0.004004410340791702 +flat_mae,patch,logistic,aabc_age,37,0.3593813663804626,test,0.3269230769230769,0.061634768390545706,0.32720797720797723,0.06046770944022488,0.3232600732600733,0.061244413286401636 +flat_mae,patch,logistic,aabc_age,38,0.046415888336127774,train,0.8523622047244095,0.015266899772267374,0.8519992782408836,0.015383870957006851,0.851742053438028,0.015365058246599526 +flat_mae,patch,logistic,aabc_age,38,0.046415888336127774,test,0.4807692307692308,0.07116527974665819,0.4884408602150538,0.07016692131037368,0.48054029304029305,0.07102644613450065 +flat_mae,patch,logistic,aabc_age,39,0.046415888336127774,train,0.84251968503937,0.0165870860841454,0.8419818332408006,0.016759804510919157,0.8424317064610108,0.0166001439785985 +flat_mae,patch,logistic,aabc_age,39,0.046415888336127774,test,0.46153846153846156,0.06473499689105737,0.4541257974761811,0.06656256458029676,0.4553571428571428,0.06493133104114582 +flat_mae,patch,logistic,aabc_age,40,9.999999999999999e-05,train,0.468503937007874,0.019868534923777343,0.4307040688295185,0.019157289472807773,0.4646736889347493,0.01958081289254235 +flat_mae,patch,logistic,aabc_age,40,9.999999999999999e-05,test,0.5,0.05463266248912721,0.44041015910086123,0.04643416251031891,0.49038461538461536,0.05314720017826937 +flat_mae,patch,logistic,aabc_age,41,0.046415888336127774,train,0.8405511811023622,0.015220150633484909,0.8402164841775028,0.015258386828784368,0.8409506865315532,0.015181018063098952 +flat_mae,patch,logistic,aabc_age,41,0.046415888336127774,test,0.4230769230769231,0.0620567000707308,0.3989819004524887,0.06418310860113063,0.42078754578754585,0.06162798762711347 +flat_mae,patch,logistic,aabc_age,42,0.005994842503189409,train,0.7007874015748031,0.019351977250349556,0.6988618056028544,0.01958167427629385,0.7003544858061831,0.019372339975703534 +flat_mae,patch,logistic,aabc_age,42,0.005994842503189409,test,0.38461538461538464,0.05821225953150516,0.3626385336743393,0.06066598807589091,0.3882783882783883,0.058921379819088744 +flat_mae,patch,logistic,aabc_age,43,9.999999999999999e-05,train,0.4862204724409449,0.01941771238058459,0.45334181348221037,0.01940362088541176,0.48318637811691467,0.01931463830077095 +flat_mae,patch,logistic,aabc_age,43,9.999999999999999e-05,test,0.5,0.06300487207609776,0.47578828828828823,0.06634836354998894,0.4908424908424909,0.06259096113351144 +flat_mae,patch,logistic,aabc_age,44,0.046415888336127774,train,0.8464566929133859,0.015343635988719597,0.8463894533445552,0.015416406798484158,0.8466139245361909,0.015377022167139057 +flat_mae,patch,logistic,aabc_age,44,0.046415888336127774,test,0.5384615384615384,0.07017939818062052,0.5350403045230632,0.07220238250133405,0.5368589743589743,0.07032051949127911 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,train,0.6811023622047244,0.020316917976265784,0.6792122143719687,0.020581758532183723,0.6806459524462541,0.020309562900946893 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,test,0.5192307692307693,0.05997902739967238,0.5059970014992504,0.0622535995006364,0.5247252747252747,0.06052408581968997 +flat_mae,patch,logistic,aabc_age,46,0.046415888336127774,train,0.844488188976378,0.015792301947265386,0.8440664781921149,0.01588607005973136,0.8447153900446691,0.01574850274169458 +flat_mae,patch,logistic,aabc_age,46,0.046415888336127774,test,0.4423076923076923,0.07019398312015204,0.4409237688847884,0.07046973153751124,0.4448260073260073,0.0702904414080819 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,train,0.6968503937007874,0.01898557460031393,0.6957381407420054,0.019058137074697584,0.6971924992663826,0.018951717003623747 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,test,0.4423076923076923,0.06470793509226921,0.43091695944956815,0.06765968064269712,0.4432234432234432,0.06455046047334474 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,train,0.8543307086614174,0.015215693002115524,0.8544681449219356,0.015289392614034027,0.8540757236919077,0.015300820728704492 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,test,0.4807692307692308,0.06524986113930224,0.4703703703703703,0.06790174350279524,0.48031135531135527,0.0653147660140734 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,train,0.8385826771653543,0.016298704298057683,0.8378999117582662,0.016510041403023007,0.8388845708290739,0.01632964841947291 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,test,0.3269230769230769,0.059739808122860706,0.32719576719576715,0.05855071998933395,0.3289835164835165,0.06030015490426219 +flat_mae,patch,logistic,aabc_age,50,0.046415888336127774,train,0.8582677165354331,0.015790911601480724,0.8582846475624952,0.015777350735578857,0.8584755096482665,0.015727934365089118 +flat_mae,patch,logistic,aabc_age,50,0.046415888336127774,test,0.36538461538461536,0.0634354725023597,0.3593906093906094,0.06382634032985776,0.36767399267399264,0.06406081613463532 +flat_mae,patch,logistic,aabc_age,51,0.046415888336127774,train,0.8464566929133859,0.015730440470422793,0.8466397769765601,0.015727007002276746,0.8476341560715703,0.015590230258999712 +flat_mae,patch,logistic,aabc_age,51,0.046415888336127774,test,0.4423076923076923,0.06367930004670175,0.42927489177489175,0.058603791013765505,0.4356684981684981,0.06261843462506654 +flat_mae,patch,logistic,aabc_age,52,0.005994842503189409,train,0.7066929133858267,0.019803014980522657,0.7062371759630608,0.01996879792082115,0.7065028462434,0.019777673217068015 +flat_mae,patch,logistic,aabc_age,52,0.005994842503189409,test,0.4423076923076923,0.06834999556321218,0.4397433899537686,0.06905768376326123,0.44345238095238093,0.0686042909083016 +flat_mae,patch,logistic,aabc_age,53,0.046415888336127774,train,0.8405511811023622,0.014749893207026951,0.8403155803735102,0.014901020062037505,0.8409506865315531,0.014765368216302612 +flat_mae,patch,logistic,aabc_age,53,0.046415888336127774,test,0.5769230769230769,0.07109535017543726,0.5811100131752305,0.0710510999704213,0.57257326007326,0.07141983363832882 +flat_mae,patch,logistic,aabc_age,54,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,54,2.782559402207126,test,0.4230769230769231,0.06643334293300644,0.42666666666666664,0.0656952207959308,0.42559523809523814,0.06688456766964754 +flat_mae,patch,logistic,aabc_age,55,0.005994842503189409,train,0.6751968503937008,0.021005186334229597,0.6734481590463888,0.021349719193835062,0.6745475786792586,0.02099350777949181 +flat_mae,patch,logistic,aabc_age,55,0.005994842503189409,test,0.5576923076923077,0.06022762542144033,0.5466829466829467,0.0634633073882868,0.5588369963369964,0.0605641729647672 +flat_mae,patch,logistic,aabc_age,56,0.005994842503189409,train,0.6850393700787402,0.020409683868457908,0.6844517271599623,0.020537578759683837,0.6851457117430556,0.02045431831356836 +flat_mae,patch,logistic,aabc_age,56,0.005994842503189409,test,0.38461538461538464,0.06943639335315084,0.384995894909688,0.0691004815798914,0.38118131868131866,0.06926009017946293 +flat_mae,patch,logistic,aabc_age,57,9.999999999999999e-05,train,0.4921259842519685,0.020345281574875404,0.46025781406792576,0.02064779256029748,0.4884144803591944,0.020184505473505972 +flat_mae,patch,logistic,aabc_age,57,9.999999999999999e-05,test,0.46153846153846156,0.05997060423891913,0.4009852216748768,0.04921538398003282,0.45329670329670335,0.058404267370179876 +flat_mae,patch,logistic,aabc_age,58,0.005994842503189409,train,0.6830708661417323,0.02082508924035341,0.6811789869482949,0.02117240878963416,0.6824445135973333,0.0208193943784184 +flat_mae,patch,logistic,aabc_age,58,0.005994842503189409,test,0.46153846153846156,0.06065573627609565,0.4435483870967742,0.06552940726551046,0.459478021978022,0.06033877770808076 +flat_mae,patch,logistic,aabc_age,59,0.005994842503189409,train,0.6948818897637795,0.02044023663284129,0.6920224528252903,0.020674354461655588,0.6947088690018394,0.020411691537248932 +flat_mae,patch,logistic,aabc_age,59,0.005994842503189409,test,0.40384615384615385,0.06843231874283313,0.3974823485693051,0.06820876584531288,0.4017857142857143,0.06829254337332072 +flat_mae,patch,logistic,aabc_age,60,0.046415888336127774,train,0.8543307086614174,0.0148814788719679,0.8537027533909327,0.015014139237668457,0.8544608727840441,0.014911260856818227 +flat_mae,patch,logistic,aabc_age,60,0.046415888336127774,test,0.40384615384615385,0.06455504279524457,0.40275132275132275,0.06452840428091523,0.4107142857142857,0.06551144468950743 +flat_mae,patch,logistic,aabc_age,61,0.046415888336127774,train,0.8503937007874016,0.01608371511410794,0.8499223155004274,0.016279501617348518,0.850646182600707,0.016066230238838044 +flat_mae,patch,logistic,aabc_age,61,0.046415888336127774,test,0.46153846153846156,0.06798933957509799,0.46467032967032973,0.06752144155407422,0.4626831501831502,0.06822601449907856 +flat_mae,patch,logistic,aabc_age,62,0.005994842503189409,train,0.6732283464566929,0.02052909295091516,0.6724999863344293,0.02065345200193851,0.6735516811823801,0.020534575708194582 +flat_mae,patch,logistic,aabc_age,62,0.005994842503189409,test,0.5384615384615384,0.06799402819235292,0.5325091575091575,0.06773784925236269,0.5352564102564102,0.06773973188322224 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,train,0.6909448818897638,0.019194442095388297,0.690456490764106,0.019415003957145446,0.6909265442884295,0.019248399432475624 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,test,0.46153846153846156,0.05919603127189521,0.4394345238095238,0.055605930916837076,0.4548992673992674,0.05834491941483136 +flat_mae,patch,logistic,aabc_age,64,0.005994842503189409,train,0.7007874015748031,0.01925292455317393,0.698925698505483,0.019550192785915917,0.700857229439056,0.019238687381887117 +flat_mae,patch,logistic,aabc_age,64,0.005994842503189409,test,0.38461538461538464,0.06205127675354926,0.3751086130118388,0.06034537229970619,0.3782051282051282,0.061171968033065176 +flat_mae,patch,logistic,aabc_age,65,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,65,166.81005372000556,test,0.4230769230769231,0.06492640348750642,0.41180213464696225,0.06617747732300965,0.4210164835164835,0.06504778586298834 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,train,0.6751968503937008,0.019823245153659796,0.673252240814584,0.019958875826495234,0.675517823544417,0.019768948452260633 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,test,0.4230769230769231,0.06741270928230866,0.43229166666666674,0.06727913700493544,0.42422161172161177,0.0675368110694786 +flat_mae,patch,logistic,aabc_age,67,0.005994842503189409,train,0.6850393700787402,0.02021982967739897,0.6845639635093184,0.020412939726301,0.6841754668778974,0.02026797305582273 +flat_mae,patch,logistic,aabc_age,67,0.005994842503189409,test,0.5384615384615384,0.06299125279746452,0.5184920634920634,0.07052700780165627,0.5393772893772893,0.06333800098844283 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,train,0.8385826771653543,0.016726712829060124,0.8381949653056032,0.01694032253519187,0.8381818805153158,0.016804781406782696 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,test,0.46153846153846156,0.06104701602601451,0.4492655620241827,0.06275866104478219,0.4608516483516484,0.060818628693039875 +flat_mae,patch,logistic,aabc_age,69,0.000774263682681127,train,0.5452755905511811,0.02011961987956553,0.5309647499761568,0.020700374421914417,0.5444024279376818,0.02002139564525435 +flat_mae,patch,logistic,aabc_age,69,0.000774263682681127,test,0.38461538461538464,0.05990075618651218,0.34044027093596063,0.0512899064682967,0.37774725274725274,0.05835923222948263 +flat_mae,patch,logistic,aabc_age,70,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,70,2.782559402207126,test,0.4230769230769231,0.06703619970587472,0.4212454212454212,0.06733962857330324,0.42261904761904756,0.06717958962717031 +flat_mae,patch,logistic,aabc_age,71,0.005994842503189409,train,0.6850393700787402,0.02081531322258997,0.6837165799799252,0.02087174570608719,0.6852633062837921,0.020844789184039047 +flat_mae,patch,logistic,aabc_age,71,0.005994842503189409,test,0.4807692307692308,0.06719541184322829,0.4781537802527308,0.06887993326505827,0.4819139194139194,0.06758399755069669 +flat_mae,patch,logistic,aabc_age,72,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,72,21.54434690031882,test,0.40384615384615385,0.06504230530423981,0.39736024844720497,0.06504647222487471,0.4004120879120879,0.06467064249926671 +flat_mae,patch,logistic,aabc_age,73,0.005994842503189409,train,0.6791338582677166,0.020742857831849062,0.6793970988567587,0.02086978239480343,0.6788797567651025,0.020803725857832963 +flat_mae,patch,logistic,aabc_age,73,0.005994842503189409,test,0.5384615384615384,0.06548701257935077,0.5380952380952381,0.06616687403765777,0.5368589743589743,0.06550393507828965 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,train,0.6968503937007874,0.019801856534856627,0.6942375918429046,0.02008775368793117,0.6961722677310032,0.01981766707978473 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,test,0.46153846153846156,0.062137278827756395,0.4556451612903225,0.06389322609951008,0.4610805860805861,0.06220597971904111 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,train,0.6889763779527559,0.0195280924489001,0.6875869033172404,0.019717833429424826,0.6885928740345499,0.019542797693221186 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,test,0.5384615384615384,0.06725488037987461,0.5298534798534799,0.06919179578193468,0.5352564102564102,0.06708739798542818 +flat_mae,patch,logistic,aabc_age,76,0.005994842503189409,train,0.6968503937007874,0.019334076073425254,0.6963258374965791,0.01953096481884297,0.6979951629205833,0.01934192173886189 +flat_mae,patch,logistic,aabc_age,76,0.005994842503189409,test,0.5,0.06637935624056053,0.49935861562306405,0.06807193242922643,0.4981684981684982,0.06654080635386729 +flat_mae,patch,logistic,aabc_age,77,0.046415888336127774,train,0.844488188976378,0.016130863923223156,0.8435251895597595,0.01637904549419637,0.8441802809418687,0.016137279747212442 +flat_mae,patch,logistic,aabc_age,77,0.046415888336127774,test,0.5,0.0691320235953681,0.5014688759516346,0.06988077873142089,0.49977106227106227,0.06937151922362128 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,train,0.8503937007874016,0.015340633401853614,0.8492545365446951,0.015498925322833687,0.8498435189465063,0.015305063236706551 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,test,0.5,0.07239899105184715,0.5039010989010989,0.07217295931440829,0.5027472527472527,0.07270306661751606 +flat_mae,patch,logistic,aabc_age,79,0.046415888336127774,train,0.8503937007874016,0.015472843656477046,0.8508077513818949,0.015445067113803883,0.851766387476529,0.015348116286398884 +flat_mae,patch,logistic,aabc_age,79,0.046415888336127774,test,0.38461538461538464,0.062416287132512814,0.3760262725779967,0.0638259470453883,0.3825549450549451,0.062347247343594175 +flat_mae,patch,logistic,aabc_age,80,0.005994842503189409,train,0.6712598425196851,0.021808189360748093,0.6701400210016727,0.02221177086932435,0.6725734048857954,0.021808745096314388 +flat_mae,patch,logistic,aabc_age,80,0.005994842503189409,test,0.46153846153846156,0.0642487192790182,0.43763736263736264,0.06490923467663018,0.4592490842490842,0.06400773653712143 +flat_mae,patch,logistic,aabc_age,81,0.046415888336127774,train,0.8484251968503937,0.01667637904951902,0.8484208393260741,0.01675376054162948,0.84868004023867,0.01670853495706794 +flat_mae,patch,logistic,aabc_age,81,0.046415888336127774,test,0.5,0.06556987329577257,0.5017717789456919,0.06624935497303235,0.5027472527472527,0.06589529603938821 +flat_mae,patch,logistic,aabc_age,82,0.046415888336127774,train,0.8543307086614174,0.015163410231805376,0.8535321736278225,0.015319894535313993,0.8541433315624227,0.015154810839918001 +flat_mae,patch,logistic,aabc_age,82,0.046415888336127774,test,0.4807692307692308,0.0670881694876757,0.4732175925925926,0.06904602513776205,0.48031135531135527,0.0670650395332144 +flat_mae,patch,logistic,aabc_age,83,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,83,2.782559402207126,test,0.36538461538461536,0.06419804555634415,0.37195616883116883,0.06390664280665741,0.36652930402930406,0.06453481998065173 +flat_mae,patch,logistic,aabc_age,84,0.046415888336127774,train,0.8484251968503937,0.015679427339315804,0.848849875918747,0.015677321456707364,0.8487800135791128,0.015658480435040274 +flat_mae,patch,logistic,aabc_age,84,0.046415888336127774,test,0.5384615384615384,0.06249112363003548,0.5356276289780126,0.06429364092482233,0.5338827838827839,0.06252349907763052 +flat_mae,patch,logistic,aabc_age,85,0.046415888336127774,train,0.8562992125984252,0.016233491354372392,0.8563125149363312,0.01627732092190166,0.8566269618269661,0.016191621939164465 +flat_mae,patch,logistic,aabc_age,85,0.046415888336127774,test,0.4423076923076923,0.06004066669187422,0.4293912427950248,0.06037655230998462,0.44024725274725274,0.05987147390114897 +flat_mae,patch,logistic,aabc_age,86,0.046415888336127774,train,0.8385826771653543,0.015731260262455114,0.8385684128408564,0.015727804729087445,0.8387169896181161,0.015659363998697294 +flat_mae,patch,logistic,aabc_age,86,0.046415888336127774,test,0.5961538461538461,0.06318296667699697,0.5872947454844006,0.06707402349579383,0.594551282051282,0.06347956881139816 +flat_mae,patch,logistic,aabc_age,87,0.046415888336127774,train,0.8503937007874016,0.015890007645834738,0.8499580424525646,0.01602025971473414,0.8506961692709283,0.015913279711858734 +flat_mae,patch,logistic,aabc_age,87,0.046415888336127774,test,0.5,0.0685825529420372,0.4977193942711184,0.06966483313801954,0.5011446886446886,0.0687655165752896 +flat_mae,patch,logistic,aabc_age,88,9.999999999999999e-05,train,0.4822834645669291,0.01804677171606985,0.44360224402520426,0.018566553435947545,0.478836578830777,0.017798067171176955 +flat_mae,patch,logistic,aabc_age,88,9.999999999999999e-05,test,0.36538461538461536,0.062133886265750414,0.3672577996715928,0.06443395991380554,0.3630952380952381,0.0619940751230195 +flat_mae,patch,logistic,aabc_age,89,0.005994842503189409,train,0.687007874015748,0.019277920369594515,0.6844603754218843,0.019530098437732797,0.6866943395430283,0.019186290180985176 +flat_mae,patch,logistic,aabc_age,89,0.005994842503189409,test,0.40384615384615385,0.06488743097260712,0.3979793833242109,0.06548820994966015,0.4017857142857143,0.06472372130519154 +flat_mae,patch,logistic,aabc_age,90,9.999999999999999e-05,train,0.4763779527559055,0.01957050776064709,0.44710707836965213,0.01997272839153692,0.4725530026525301,0.019466152527572465 +flat_mae,patch,logistic,aabc_age,90,9.999999999999999e-05,test,0.5,0.060633673051303,0.4821001877453491,0.0614229528228039,0.49633699633699635,0.060272415046333176 +flat_mae,patch,logistic,aabc_age,91,0.000774263682681127,train,0.562992125984252,0.02171711891035519,0.5524947478991596,0.02195739632842267,0.5611069661999006,0.021572197868785826 +flat_mae,patch,logistic,aabc_age,91,0.000774263682681127,test,0.46153846153846156,0.06624006010059304,0.4420980262131378,0.06994111915377038,0.4608516483516484,0.06621558216525715 +flat_mae,patch,logistic,aabc_age,92,0.3593813663804626,train,0.9940944881889764,0.0032925510143024853,0.994122290094622,0.0032784463833620448,0.9943367619953625,0.003166697486148637 +flat_mae,patch,logistic,aabc_age,92,0.3593813663804626,test,0.5192307692307693,0.06869849873039913,0.509158615136876,0.07182624442984537,0.5203754578754579,0.06902399008228371 +flat_mae,patch,logistic,aabc_age,93,0.046415888336127774,train,0.8523622047244095,0.016214938185156336,0.8521219560039508,0.016316268024657375,0.8517920401082493,0.016321178026465023 +flat_mae,patch,logistic,aabc_age,93,0.046415888336127774,test,0.34615384615384615,0.052826205506033466,0.3232976314872867,0.05099056347859926,0.34226190476190477,0.05200557331540366 +flat_mae,patch,logistic,aabc_age,94,9.999999999999999e-05,train,0.4763779527559055,0.019576502355601957,0.4339208152937611,0.019206173111804368,0.472638231723339,0.019362741013550667 +flat_mae,patch,logistic,aabc_age,94,9.999999999999999e-05,test,0.40384615384615385,0.05859260358839051,0.36620670995670995,0.0603907605172631,0.39720695970695974,0.057529726545082385 +flat_mae,patch,logistic,aabc_age,95,0.005994842503189409,train,0.6771653543307087,0.01988621610370313,0.6758810608908616,0.02012455969116714,0.6780690616794754,0.01979989456448332 +flat_mae,patch,logistic,aabc_age,95,0.005994842503189409,test,0.5,0.06301939215021236,0.4875899962106859,0.06286901645579311,0.4965659340659341,0.06263604936950677 +flat_mae,patch,logistic,aabc_age,96,0.046415888336127774,train,0.8503937007874016,0.015308886919826596,0.8503615616540265,0.015440399447607424,0.8506961692709282,0.01534308410435802 +flat_mae,patch,logistic,aabc_age,96,0.046415888336127774,test,0.38461538461538464,0.062227346107240476,0.3859447004608295,0.0619538480792982,0.38713369963369965,0.06253333821332652 +flat_mae,patch,logistic,aabc_age,97,0.005994842503189409,train,0.6850393700787402,0.019758690564197445,0.684256590300752,0.019827751341816597,0.6847281971809915,0.0198135470972096 +flat_mae,patch,logistic,aabc_age,97,0.005994842503189409,test,0.4230769230769231,0.06486426428369545,0.41858974358974355,0.06362208792538525,0.4223901098901099,0.06471972336270713 +flat_mae,patch,logistic,aabc_age,98,0.046415888336127774,train,0.844488188976378,0.015520979344152228,0.8448581975887655,0.015551471623894527,0.8457856082502699,0.0154451885111312 +flat_mae,patch,logistic,aabc_age,98,0.046415888336127774,test,0.46153846153846156,0.06736383372904602,0.4535035366931919,0.06810922398770386,0.4610805860805861,0.06746827878366458 +flat_mae,patch,logistic,aabc_age,99,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,99,21.54434690031882,test,0.4807692307692308,0.06696532706995313,0.47916666666666674,0.06720532961517768,0.48328754578754574,0.06733063586148494 +flat_mae,patch,logistic,aabc_age,100,0.046415888336127774,train,0.860236220472441,0.01572130803460872,0.8604067297874014,0.015740529324844096,0.8597889483667664,0.01579739667270168 +flat_mae,patch,logistic,aabc_age,100,0.046415888336127774,test,0.36538461538461536,0.06512140460154485,0.3701539855072464,0.06596356107803629,0.36652930402930406,0.0650708943151534 diff --git a/data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic/log.txt b/data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..475e92fa27fdc035945090ebeca93e8aeb66c2e4 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:20:38 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/aabc_age__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:21:23 time: 5.6288 data: 4.5380 max mem: 3205 +extract (train) [ 20/228] eta: 0:01:46 time: 0.2558 data: 0.0857 max mem: 3393 +extract (train) [ 40/228] eta: 0:01:10 time: 0.2321 data: 0.0749 max mem: 3393 +extract (train) [ 60/228] eta: 0:00:54 time: 0.2197 data: 0.0709 max mem: 3393 +extract (train) [ 80/228] eta: 0:00:44 time: 0.2266 data: 0.0778 max mem: 3393 +extract (train) [100/228] eta: 0:00:37 time: 0.2518 data: 0.0900 max mem: 3393 +extract (train) [120/228] eta: 0:00:29 time: 0.2006 data: 0.0659 max mem: 3393 +extract (train) [140/228] eta: 0:00:23 time: 0.2376 data: 0.0837 max mem: 3393 +extract (train) [160/228] eta: 0:00:17 time: 0.2173 data: 0.0760 max mem: 3393 +extract (train) [180/228] eta: 0:00:12 time: 0.2098 data: 0.0737 max mem: 3393 +extract (train) [200/228] eta: 0:00:07 time: 0.2030 data: 0.0656 max mem: 3393 +extract (train) [220/228] eta: 0:00:01 time: 0.1831 data: 0.0588 max mem: 3393 +extract (train) [227/228] eta: 0:00:00 time: 0.1831 data: 0.0600 max mem: 3393 +extract (train) Total time: 0:00:56 (0.2462 s / it) +extract (validation) [ 0/27] eta: 0:01:54 time: 4.2247 data: 4.0818 max mem: 3393 +extract (validation) [20/27] eta: 0:00:02 time: 0.2007 data: 0.0632 max mem: 3393 +extract (validation) [26/27] eta: 0:00:00 time: 0.1752 data: 0.0520 max mem: 3393 +extract (validation) Total time: 0:00:09 (0.3542 s / it) +extract (test) [ 0/26] eta: 0:01:58 time: 4.5586 data: 4.4067 max mem: 3393 +extract (test) [20/26] eta: 0:00:02 time: 0.1852 data: 0.0483 max mem: 3393 +extract (test) [25/26] eta: 0:00:00 time: 0.1680 data: 0.0417 max mem: 3393 +extract (test) Total time: 0:00:09 (0.3595 s / it) +feature extraction time: 0:01:15 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | | 0.0001 | train | 0.49016 | 0.021038 | 0.46514 | 0.021461 | 0.48726 | 0.02089 | +| flat_mae | patch | logistic | aabc_age | | 0.0001 | test | 0.28846 | 0.05737 | 0.25203 | 0.050677 | 0.27747 | 0.055733 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06124413992012797, "f1": 0.448951048951049, "f1_std": 0.061843570504675774, "bacc": 0.45947802197802196, "bacc_std": 0.06077154372766225} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06527588244062554, "f1": 0.5339433551198256, "f1_std": 0.06807932188148856, "bacc": 0.5338827838827839, "bacc_std": 0.06522589118218011} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.05889930982808587, "f1": 0.38554376657824935, "f1_std": 0.05951691557202573, "bacc": 0.4015567765567766, "bacc_std": 0.05885096166091861} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06714040750194049, "f1": 0.5225378382237453, "f1_std": 0.06678976575968712, "bacc": 0.5203754578754579, "bacc_std": 0.06730941528500405} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06080421339729596, "f1": 0.41138716356107663, "f1_std": 0.06071568495113502, "bacc": 0.4207875457875458, "bacc_std": 0.06058450169798242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06648245753402407, "f1": 0.568101438791094, "f1_std": 0.06885407094397733, "bacc": 0.5766941391941393, "bacc_std": 0.0665927979962554} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05875211188448951, "f1": 0.4564244663382595, "f1_std": 0.05584177111013447, "bacc": 0.4741300366300366, "bacc_std": 0.05765113856835151} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06639842979536445, "f1": 0.5112091356918943, "f1_std": 0.06392374046333094, "bacc": 0.5057234432234432, "bacc_std": 0.06692120605464606} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06003347192591221, "f1": 0.452821521570042, "f1_std": 0.05688643140095756, "bacc": 0.4713827838827839, "bacc_std": 0.059065740735691784} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06778894980729774, "f1": 0.46042572463768117, "f1_std": 0.06690655397016707, "bacc": 0.45673076923076916, "bacc_std": 0.06751833962764149} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06703456672564292, "f1": 0.5635683760683761, "f1_std": 0.06535473227557072, "bacc": 0.5620421245421245, "bacc_std": 0.06722734217956061} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06368713979405835, "f1": 0.38389850889850885, "f1_std": 0.06282552693782563, "bacc": 0.3825549450549451, "bacc_std": 0.0635861909928168} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06604557329207833, "f1": 0.36962962962962964, "f1_std": 0.06448047117487775, "bacc": 0.3676739926739927, "bacc_std": 0.06647401416183259} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06223851810651242, "f1": 0.5698683110367893, "f1_std": 0.06564828571412668, "bacc": 0.5798992673992674, "bacc_std": 0.06226882078726812} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06720303951987633, "f1": 0.4771033372891718, "f1_std": 0.06803691766060141, "bacc": 0.47756410256410253, "bacc_std": 0.06713875408618737} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.0654987691288657, "f1": 0.42993265993265994, "f1_std": 0.0660281608726854, "bacc": 0.42719780219780223, "bacc_std": 0.06586762566675118} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05677958786071924, "f1": 0.43205128205128207, "f1_std": 0.05789846354034606, "bacc": 0.47275641025641024, "bacc_std": 0.05565036944950224} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 2.782559402207126, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06673203279672689, "f1": 0.3857600732600732, "f1_std": 0.06616524572809852, "bacc": 0.38850732600732596, "bacc_std": 0.06714848960375125} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06830599226921205, "f1": 0.46480978260869565, "f1_std": 0.06961204803772432, "bacc": 0.45993589743589747, "bacc_std": 0.06835062336760533} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.05934149241376973, "f1": 0.47508241758241754, "f1_std": 0.06428332658098869, "bacc": 0.49793956043956045, "bacc_std": 0.059334255682819685} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.0632509208951367, "f1": 0.3926406926406927, "f1_std": 0.06281122948083084, "bacc": 0.40453296703296704, "bacc_std": 0.06361587115398405} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.3076923076923077, "acc_std": 0.06228888605075377, "f1": 0.32311499479018657, "f1_std": 0.06229511396758932, "bacc": 0.30860805860805857, "bacc_std": 0.06235288844389938} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 2.782559402207126, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06004884599495185, "f1": 0.4237169312169312, "f1_std": 0.05817857687124815, "bacc": 0.42536630036630035, "bacc_std": 0.06060096521895294} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06625523313037064, "f1": 0.4267741935483871, "f1_std": 0.06579289452816635, "bacc": 0.4212454212454212, "bacc_std": 0.06619021690247019} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06271088681755177, "f1": 0.35775162337662336, "f1_std": 0.06315124341121144, "bacc": 0.3617216117216117, "bacc_std": 0.06231651483009279} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06289696420741527, "f1": 0.3809693581662436, "f1_std": 0.05966986775048064, "bacc": 0.3985805860805861, "bacc_std": 0.062176427896008135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.060313517673911084, "f1": 0.357683976347989, "f1_std": 0.05746858091206502, "bacc": 0.37934981684981683, "bacc_std": 0.05939219525579258} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06474688998950923, "f1": 0.390485312899106, "f1_std": 0.0640825388684165, "bacc": 0.4001831501831502, "bacc_std": 0.06434523346695864} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 9.999999999999999e-05, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06434811942374681, "f1": 0.34853535353535353, "f1_std": 0.060751513516429295, "bacc": 0.3601190476190476, "bacc_std": 0.0634981557801382} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06651123275688277, "f1": 0.5371799337316578, "f1_std": 0.06594600536879391, "bacc": 0.5409798534798534, "bacc_std": 0.06667929524806138} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05878366465923362, "f1": 0.42183908045977014, "f1_std": 0.04909800992024476, "bacc": 0.47115384615384615, "bacc_std": 0.05730733013276767} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06200801627886702, "f1": 0.4472723955482576, "f1_std": 0.06214717612657631, "bacc": 0.4448260073260073, "bacc_std": 0.06229503999114824} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06826479942281151, "f1": 0.4767130857648099, "f1_std": 0.0689634948773413, "bacc": 0.4819139194139194, "bacc_std": 0.06868282550850108} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 2.782559402207126, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.05915584672019692, "f1": 0.32964132641551996, "f1_std": 0.05850913892679913, "bacc": 0.3351648351648352, "bacc_std": 0.0601171555418695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06678999809966944, "f1": 0.40172875370243794, "f1_std": 0.0682856104774609, "bacc": 0.4017857142857143, "bacc_std": 0.06660701651241716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06534671978747095, "f1": 0.49983852701244, "f1_std": 0.06476011572251777, "bacc": 0.5041208791208791, "bacc_std": 0.06543085447767505} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.061634768390545706, "f1": 0.32720797720797723, "f1_std": 0.06046770944022488, "bacc": 0.3232600732600733, "bacc_std": 0.061244413286401636} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.07116527974665819, "f1": 0.4884408602150538, "f1_std": 0.07016692131037368, "bacc": 0.48054029304029305, "bacc_std": 0.07102644613450065} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06473499689105737, "f1": 0.4541257974761811, "f1_std": 0.06656256458029676, "bacc": 0.4553571428571428, "bacc_std": 0.06493133104114582} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5, "acc_std": 0.05463266248912721, "f1": 0.44041015910086123, "f1_std": 0.04643416251031891, "bacc": 0.49038461538461536, "bacc_std": 0.05314720017826937} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.0620567000707308, "f1": 0.3989819004524887, "f1_std": 0.06418310860113063, "bacc": 0.42078754578754585, "bacc_std": 0.06162798762711347} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.05821225953150516, "f1": 0.3626385336743393, "f1_std": 0.06066598807589091, "bacc": 0.3882783882783883, "bacc_std": 0.058921379819088744} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5, "acc_std": 0.06300487207609776, "f1": 0.47578828828828823, "f1_std": 0.06634836354998894, "bacc": 0.4908424908424909, "bacc_std": 0.06259096113351144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.07017939818062052, "f1": 0.5350403045230632, "f1_std": 0.07220238250133405, "bacc": 0.5368589743589743, "bacc_std": 0.07032051949127911} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.05997902739967238, "f1": 0.5059970014992504, "f1_std": 0.0622535995006364, "bacc": 0.5247252747252747, "bacc_std": 0.06052408581968997} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.07019398312015204, "f1": 0.4409237688847884, "f1_std": 0.07046973153751124, "bacc": 0.4448260073260073, "bacc_std": 0.0702904414080819} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06470793509226921, "f1": 0.43091695944956815, "f1_std": 0.06765968064269712, "bacc": 0.4432234432234432, "bacc_std": 0.06455046047334474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06524986113930224, "f1": 0.4703703703703703, "f1_std": 0.06790174350279524, "bacc": 0.48031135531135527, "bacc_std": 0.0653147660140734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.059739808122860706, "f1": 0.32719576719576715, "f1_std": 0.05855071998933395, "bacc": 0.3289835164835165, "bacc_std": 0.06030015490426219} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.0634354725023597, "f1": 0.3593906093906094, "f1_std": 0.06382634032985776, "bacc": 0.36767399267399264, "bacc_std": 0.06406081613463532} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06367930004670175, "f1": 0.42927489177489175, "f1_std": 0.058603791013765505, "bacc": 0.4356684981684981, "bacc_std": 0.06261843462506654} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06834999556321218, "f1": 0.4397433899537686, "f1_std": 0.06905768376326123, "bacc": 0.44345238095238093, "bacc_std": 0.0686042909083016} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.07109535017543726, "f1": 0.5811100131752305, "f1_std": 0.0710510999704213, "bacc": 0.57257326007326, "bacc_std": 0.07141983363832882} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 2.782559402207126, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06643334293300644, "f1": 0.42666666666666664, "f1_std": 0.0656952207959308, "bacc": 0.42559523809523814, "bacc_std": 0.06688456766964754} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06022762542144033, "f1": 0.5466829466829467, "f1_std": 0.0634633073882868, "bacc": 0.5588369963369964, "bacc_std": 0.0605641729647672} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06943639335315084, "f1": 0.384995894909688, "f1_std": 0.0691004815798914, "bacc": 0.38118131868131866, "bacc_std": 0.06926009017946293} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05997060423891913, "f1": 0.4009852216748768, "f1_std": 0.04921538398003282, "bacc": 0.45329670329670335, "bacc_std": 0.058404267370179876} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06065573627609565, "f1": 0.4435483870967742, "f1_std": 0.06552940726551046, "bacc": 0.459478021978022, "bacc_std": 0.06033877770808076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06843231874283313, "f1": 0.3974823485693051, "f1_std": 0.06820876584531288, "bacc": 0.4017857142857143, "bacc_std": 0.06829254337332072} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06455504279524457, "f1": 0.40275132275132275, "f1_std": 0.06452840428091523, "bacc": 0.4107142857142857, "bacc_std": 0.06551144468950743} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06798933957509799, "f1": 0.46467032967032973, "f1_std": 0.06752144155407422, "bacc": 0.4626831501831502, "bacc_std": 0.06822601449907856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06799402819235292, "f1": 0.5325091575091575, "f1_std": 0.06773784925236269, "bacc": 0.5352564102564102, "bacc_std": 0.06773973188322224} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05919603127189521, "f1": 0.4394345238095238, "f1_std": 0.055605930916837076, "bacc": 0.4548992673992674, "bacc_std": 0.05834491941483136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06205127675354926, "f1": 0.3751086130118388, "f1_std": 0.06034537229970619, "bacc": 0.3782051282051282, "bacc_std": 0.061171968033065176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 166.81005372000556, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06492640348750642, "f1": 0.41180213464696225, "f1_std": 0.06617747732300965, "bacc": 0.4210164835164835, "bacc_std": 0.06504778586298834} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06741270928230866, "f1": 0.43229166666666674, "f1_std": 0.06727913700493544, "bacc": 0.42422161172161177, "bacc_std": 0.0675368110694786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06299125279746452, "f1": 0.5184920634920634, "f1_std": 0.07052700780165627, "bacc": 0.5393772893772893, "bacc_std": 0.06333800098844283} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06104701602601451, "f1": 0.4492655620241827, "f1_std": 0.06275866104478219, "bacc": 0.4608516483516484, "bacc_std": 0.060818628693039875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.05990075618651218, "f1": 0.34044027093596063, "f1_std": 0.0512899064682967, "bacc": 0.37774725274725274, "bacc_std": 0.05835923222948263} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06703619970587472, "f1": 0.4212454212454212, "f1_std": 0.06733962857330324, "bacc": 0.42261904761904756, "bacc_std": 0.06717958962717031} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06719541184322829, "f1": 0.4781537802527308, "f1_std": 0.06887993326505827, "bacc": 0.4819139194139194, "bacc_std": 0.06758399755069669} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 21.54434690031882, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06504230530423981, "f1": 0.39736024844720497, "f1_std": 0.06504647222487471, "bacc": 0.4004120879120879, "bacc_std": 0.06467064249926671} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06548701257935077, "f1": 0.5380952380952381, "f1_std": 0.06616687403765777, "bacc": 0.5368589743589743, "bacc_std": 0.06550393507828965} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.062137278827756395, "f1": 0.4556451612903225, "f1_std": 0.06389322609951008, "bacc": 0.4610805860805861, "bacc_std": 0.06220597971904111} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06725488037987461, "f1": 0.5298534798534799, "f1_std": 0.06919179578193468, "bacc": 0.5352564102564102, "bacc_std": 0.06708739798542818} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06637935624056053, "f1": 0.49935861562306405, "f1_std": 0.06807193242922643, "bacc": 0.4981684981684982, "bacc_std": 0.06654080635386729} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.0691320235953681, "f1": 0.5014688759516346, "f1_std": 0.06988077873142089, "bacc": 0.49977106227106227, "bacc_std": 0.06937151922362128} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.07239899105184715, "f1": 0.5039010989010989, "f1_std": 0.07217295931440829, "bacc": 0.5027472527472527, "bacc_std": 0.07270306661751606} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.062416287132512814, "f1": 0.3760262725779967, "f1_std": 0.0638259470453883, "bacc": 0.3825549450549451, "bacc_std": 0.062347247343594175} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0642487192790182, "f1": 0.43763736263736264, "f1_std": 0.06490923467663018, "bacc": 0.4592490842490842, "bacc_std": 0.06400773653712143} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06556987329577257, "f1": 0.5017717789456919, "f1_std": 0.06624935497303235, "bacc": 0.5027472527472527, "bacc_std": 0.06589529603938821} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0670881694876757, "f1": 0.4732175925925926, "f1_std": 0.06904602513776205, "bacc": 0.48031135531135527, "bacc_std": 0.0670650395332144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 2.782559402207126, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06419804555634415, "f1": 0.37195616883116883, "f1_std": 0.06390664280665741, "bacc": 0.36652930402930406, "bacc_std": 0.06453481998065173} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06249112363003548, "f1": 0.5356276289780126, "f1_std": 0.06429364092482233, "bacc": 0.5338827838827839, "bacc_std": 0.06252349907763052} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06004066669187422, "f1": 0.4293912427950248, "f1_std": 0.06037655230998462, "bacc": 0.44024725274725274, "bacc_std": 0.05987147390114897} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06318296667699697, "f1": 0.5872947454844006, "f1_std": 0.06707402349579383, "bacc": 0.594551282051282, "bacc_std": 0.06347956881139816} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.0685825529420372, "f1": 0.4977193942711184, "f1_std": 0.06966483313801954, "bacc": 0.5011446886446886, "bacc_std": 0.0687655165752896} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 9.999999999999999e-05, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.062133886265750414, "f1": 0.3672577996715928, "f1_std": 0.06443395991380554, "bacc": 0.3630952380952381, "bacc_std": 0.0619940751230195} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06488743097260712, "f1": 0.3979793833242109, "f1_std": 0.06548820994966015, "bacc": 0.4017857142857143, "bacc_std": 0.06472372130519154} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5, "acc_std": 0.060633673051303, "f1": 0.4821001877453491, "f1_std": 0.0614229528228039, "bacc": 0.49633699633699635, "bacc_std": 0.060272415046333176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06624006010059304, "f1": 0.4420980262131378, "f1_std": 0.06994111915377038, "bacc": 0.4608516483516484, "bacc_std": 0.06621558216525715} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06869849873039913, "f1": 0.509158615136876, "f1_std": 0.07182624442984537, "bacc": 0.5203754578754579, "bacc_std": 0.06902399008228371} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.052826205506033466, "f1": 0.3232976314872867, "f1_std": 0.05099056347859926, "bacc": 0.34226190476190477, "bacc_std": 0.05200557331540366} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 9.999999999999999e-05, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.05859260358839051, "f1": 0.36620670995670995, "f1_std": 0.0603907605172631, "bacc": 0.39720695970695974, "bacc_std": 0.057529726545082385} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06301939215021236, "f1": 0.4875899962106859, "f1_std": 0.06286901645579311, "bacc": 0.4965659340659341, "bacc_std": 0.06263604936950677} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.062227346107240476, "f1": 0.3859447004608295, "f1_std": 0.0619538480792982, "bacc": 0.38713369963369965, "bacc_std": 0.06253333821332652} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06486426428369545, "f1": 0.41858974358974355, "f1_std": 0.06362208792538525, "bacc": 0.4223901098901099, "bacc_std": 0.06471972336270713} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06736383372904602, "f1": 0.4535035366931919, "f1_std": 0.06810922398770386, "bacc": 0.4610805860805861, "bacc_std": 0.06746827878366458} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 21.54434690031882, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06696532706995313, "f1": 0.47916666666666674, "f1_std": 0.06720532961517768, "bacc": 0.48328754578754574, "bacc_std": 0.06733063586148494} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06512140460154485, "f1": 0.3701539855072464, "f1_std": 0.06596356107803629, "bacc": 0.36652930402930406, "bacc_std": 0.0650708943151534} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | train | 100 | 2.3044 | 16.898 | 0.76906 | 0.15034 | 0.76481 | 0.15781 | 0.76876 | 0.15111 | +| flat_mae | patch | logistic | aabc_age | test | 100 | 2.3044 | 16.898 | 0.45173 | 0.064015 | 0.44272 | 0.064637 | 0.45048 | 0.064005 | + + +done! total time: 0:05:31 diff --git a/data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic/config.yaml b/data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23e03ac6126dfb0a770d5df876030c1e82eab401 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic +remote_dir: null diff --git a/data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv b/data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..5971ebc64e7376ef5e6855471ba7b7f4bdfe2170 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_sex,,0.3593813663804626,train,0.9905482041587902,0.004188331238116485,0.9903381466044704,0.00427237663297526,0.9912104800936768,0.003908802732771462 +flat_mae,patch,logistic,aabc_sex,,0.3593813663804626,test,0.9636363636363636,0.025035406332790652,0.9626358695652174,0.025353011679456864,0.9696969696969697,0.020862838610658855 +flat_mae,patch,logistic,aabc_sex,1,0.046415888336127774,train,0.947069943289225,0.010150154304871008,0.9453818696716718,0.010550057936558768,0.9426932207860723,0.011107771566807152 +flat_mae,patch,logistic,aabc_sex,1,0.046415888336127774,test,0.8181818181818182,0.05199937698291747,0.8151881720430108,0.052766511852328744,0.8192934782608696,0.052299282119432 +flat_mae,patch,logistic,aabc_sex,2,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,2,2.782559402207126,test,0.9090909090909091,0.03942971988129456,0.9071259709557582,0.0402703007695785,0.9096467391304348,0.040006773036680716 +flat_mae,patch,logistic,aabc_sex,3,0.3593813663804626,train,0.994328922495274,0.0033052107034787204,0.9941893034853195,0.00338405446495056,0.9944898736774231,0.0032516989990078143 +flat_mae,patch,logistic,aabc_sex,3,0.3593813663804626,test,0.7818181818181819,0.05799300043258283,0.7758152173913043,0.059816896096389016,0.7758152173913043,0.0598727041432624 +flat_mae,patch,logistic,aabc_sex,4,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,4,21.54434690031882,test,0.8181818181818182,0.051276324825432866,0.8176392572944298,0.051163831855481824,0.8315217391304348,0.047817871758453624 +flat_mae,patch,logistic,aabc_sex,5,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,5,21.54434690031882,test,0.8181818181818182,0.05178820185370088,0.8106060606060606,0.05441675829108289,0.8070652173913043,0.05414839896590889 +flat_mae,patch,logistic,aabc_sex,6,0.046415888336127774,train,0.9395085066162571,0.01029689657590648,0.9374926149119698,0.010737430881859416,0.9343327764588646,0.011351161666258157 +flat_mae,patch,logistic,aabc_sex,6,0.046415888336127774,test,0.9454545454545454,0.030464720145665536,0.9435897435897436,0.031856996515590215,0.9408967391304348,0.03345344386828878 +flat_mae,patch,logistic,aabc_sex,7,0.046415888336127774,train,0.945179584120983,0.0097371657601043,0.9434697855750487,0.010086064195743526,0.9410592338579677,0.010479310984652783 +flat_mae,patch,logistic,aabc_sex,7,0.046415888336127774,test,0.8727272727272727,0.04419335427395696,0.8683760683760684,0.046336155470605274,0.8661684782608696,0.047050180221596266 +flat_mae,patch,logistic,aabc_sex,8,0.005994842503189409,train,0.9092627599243857,0.011956170709970725,0.9061057862974795,0.012435853522113354,0.9027154957648231,0.012683663237009517 +flat_mae,patch,logistic,aabc_sex,8,0.005994842503189409,test,0.8363636363636363,0.0482003532101869,0.8328267477203647,0.04925630976767027,0.8349184782608696,0.0494145761527003 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,train,0.9546313799621928,0.009498654356783485,0.9533701592525121,0.009790086831835434,0.9522699961898062,0.010096251250045686 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,test,0.8727272727272727,0.04613563904491454,0.8683760683760684,0.048134896265990525,0.8661684782608696,0.04862276659367932 +flat_mae,patch,logistic,aabc_sex,10,0.005994842503189409,train,0.9054820415879017,0.012641860007147175,0.9021935273932079,0.01319797148444984,0.8988393563703508,0.013561658046254194 +flat_mae,patch,logistic,aabc_sex,10,0.005994842503189409,test,0.8545454545454545,0.04786946437798013,0.8484848484848485,0.05079627292231832,0.8444293478260869,0.05113769010411776 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,train,0.941398865784499,0.010239048026796852,0.939571150097466,0.010620526906568485,0.9371830944634953,0.011002834591063708 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,test,0.9272727272727272,0.03661195216486434,0.9252717391304348,0.03771518057012766,0.9252717391304348,0.03802982215594783 +flat_mae,patch,logistic,aabc_sex,12,0.3593813663804626,train,0.996219281663516,0.002744653091645616,0.9961190832526338,0.002822858311714484,0.9955156950672646,0.0032554293396424515 +flat_mae,patch,logistic,aabc_sex,12,0.3593813663804626,test,0.7818181818181819,0.055322214688283466,0.7727272727272727,0.058789336337662784,0.7697010869565217,0.05838419559668523 +flat_mae,patch,logistic,aabc_sex,13,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,13,2.782559402207126,test,0.8727272727272727,0.04483875242475267,0.8683760683760684,0.04644283901059012,0.8661684782608696,0.04642564110533098 +flat_mae,patch,logistic,aabc_sex,14,0.3593813663804626,train,0.994328922495274,0.003132011967646978,0.9941822314276811,0.003215745913053728,0.99388170813916,0.003407654953867839 +flat_mae,patch,logistic,aabc_sex,14,0.3593813663804626,test,0.9272727272727272,0.033057145612303836,0.9266666666666667,0.032968321281559826,0.9375,0.0284084845105736 +flat_mae,patch,logistic,aabc_sex,15,0.046415888336127774,train,0.945179584120983,0.009721869280138046,0.9434697855750487,0.010109176684731875,0.9410592338579677,0.010641944600358525 +flat_mae,patch,logistic,aabc_sex,15,0.046415888336127774,test,0.8727272727272727,0.04467781448815525,0.8699763593380614,0.045720592378123606,0.8722826086956521,0.04575159534606618 +flat_mae,patch,logistic,aabc_sex,16,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,16,166.81005372000556,test,0.8727272727272727,0.04376361747931758,0.8639095086603039,0.049347647260052904,0.8539402173913043,0.04981773616200069 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,train,0.945179584120983,0.009808005352420377,0.9435455084069022,0.010152007721254687,0.9416673993962308,0.010556661501019936 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,test,0.8545454545454545,0.046291508958478444,0.84593837535014,0.050932976570092874,0.8383152173913043,0.05095552319455272 +flat_mae,patch,logistic,aabc_sex,18,0.005994842503189409,train,0.8998109640831758,0.013242609705467801,0.896250328415428,0.013850707050975844,0.8927210645095108,0.014228486552302966 +flat_mae,patch,logistic,aabc_sex,18,0.005994842503189409,test,0.8727272727272727,0.04425290203865701,0.8711943793911007,0.04443531354761817,0.8783967391304348,0.04283113976051379 +flat_mae,patch,logistic,aabc_sex,19,0.3593813663804626,train,0.9886578449905482,0.004542177100827011,0.9883855386416862,0.004645154075266663,0.9889797473548463,0.0044849529410591095 +flat_mae,patch,logistic,aabc_sex,19,0.3593813663804626,test,0.8909090909090909,0.040495623679480265,0.8891129032258065,0.04092174142487251,0.8940217391304348,0.0399339461506134 +flat_mae,patch,logistic,aabc_sex,20,0.3593813663804626,train,0.9924385633270322,0.0037577069078246162,0.9922477212110554,0.0038525052433469333,0.9922477212110554,0.0038775170237524232 +flat_mae,patch,logistic,aabc_sex,20,0.3593813663804626,test,0.8545454545454545,0.0464259260712185,0.8484848484848485,0.04937434624824649,0.8444293478260869,0.049726421696210533 +flat_mae,patch,logistic,aabc_sex,21,0.046415888336127774,train,0.9546313799621928,0.00892300469533547,0.9533097969991173,0.009217987259882012,0.9516618306515432,0.009580595248655286 +flat_mae,patch,logistic,aabc_sex,21,0.046415888336127774,test,0.8181818181818182,0.04873936078013535,0.8106060606060606,0.05176715961049238,0.8070652173913043,0.0519198730634737 +flat_mae,patch,logistic,aabc_sex,22,0.005994842503189409,train,0.8998109640831758,0.01233538662900395,0.896250328415428,0.012841992892186167,0.8927210645095108,0.01302940897977597 +flat_mae,patch,logistic,aabc_sex,22,0.005994842503189409,test,0.9454545454545454,0.030531685208204498,0.9442755825734549,0.031119085664288105,0.9470108695652174,0.030249032194588295 +flat_mae,patch,logistic,aabc_sex,23,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,23,2.782559402207126,test,0.8727272727272727,0.045079236025704295,0.8711943793911007,0.045326019952044244,0.8783967391304348,0.04402196633395475 +flat_mae,patch,logistic,aabc_sex,24,0.046415888336127774,train,0.9376181474480151,0.010621853312036428,0.935672514619883,0.011013413837129348,0.9333069550690232,0.011400537057465715 +flat_mae,patch,logistic,aabc_sex,24,0.046415888336127774,test,0.8727272727272727,0.042659908211282405,0.8639095086603039,0.04810423536346599,0.8539402173913043,0.04835060225729434 +flat_mae,patch,logistic,aabc_sex,25,0.046415888336127774,train,0.9338374291115312,0.010617250019540036,0.9316802273020792,0.01104970742615337,0.9288226501362877,0.011472118329509296 +flat_mae,patch,logistic,aabc_sex,25,0.046415888336127774,test,0.9818181818181818,0.01828349750435446,0.9814251941911516,0.018512548584299526,0.984375,0.015712380667804608 +flat_mae,patch,logistic,aabc_sex,26,0.005994842503189409,train,0.9073724007561437,0.012296450886042351,0.904218013856813,0.012768161726679656,0.9010815088367186,0.012964731037646553 +flat_mae,patch,logistic,aabc_sex,26,0.005994842503189409,test,0.8,0.04986560450246936,0.7931623931623932,0.052388615428699485,0.7914402173913043,0.05247020282758435 +flat_mae,patch,logistic,aabc_sex,27,0.046415888336127774,train,0.947069943289225,0.0094872220783422,0.945455884519075,0.009829957755162471,0.9433013863243354,0.01026153787419479 +flat_mae,patch,logistic,aabc_sex,27,0.046415888336127774,test,0.9090909090909091,0.03651966525604011,0.905982905982906,0.03804541247166257,0.9035326086956521,0.03859428091845315 +flat_mae,patch,logistic,aabc_sex,28,0.046415888336127774,train,0.9376181474480151,0.010294984195719992,0.935672514619883,0.010671530585557969,0.9333069550690232,0.011049476630923863 +flat_mae,patch,logistic,aabc_sex,28,0.046415888336127774,test,0.8909090909090909,0.03985326806181159,0.884453781512605,0.04432569604070248,0.8756793478260869,0.04554873268887791 +flat_mae,patch,logistic,aabc_sex,29,0.046415888336127774,train,0.941398865784499,0.01026512025159384,0.9396520951935851,0.010618541359418709,0.9377912600017586,0.010966079232861118 +flat_mae,patch,logistic,aabc_sex,29,0.046415888336127774,test,0.8909090909090909,0.040856596622862576,0.8879076086956521,0.041930947794341485,0.8879076086956521,0.04188844943408443 +flat_mae,patch,logistic,aabc_sex,30,0.3593813663804626,train,0.9867674858223062,0.004923840991037784,0.9864417081324122,0.005041467258845173,0.9867375948884786,0.004981620164500961 +flat_mae,patch,logistic,aabc_sex,30,0.3593813663804626,test,0.8545454545454545,0.04877880585691335,0.8505434782608696,0.05030690166366289,0.8505434782608696,0.050303162916787776 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,train,0.9508506616257089,0.009507709099281772,0.9493518927677125,0.009852653193054636,0.9471775257188078,0.010287330950512574 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,test,0.8727272727272727,0.04179843011801493,0.8683760683760684,0.043371503942611855,0.8661684782608696,0.043328127678875075 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,train,0.941398865784499,0.010594463274627444,0.9396520951935851,0.010968472414919066,0.9377912600017586,0.011365547224346996 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,test,0.9454545454545454,0.02937720485834649,0.9427282193682749,0.03203095297824208,0.9347826086956521,0.035124918852370804 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,train,0.9395085066162571,0.009839637430874496,0.9375792796247677,0.010216691191222196,0.9349409419971277,0.010636524013520627 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,test,0.9272727272727272,0.035227243401747335,0.9252717391304348,0.036226000698534655,0.9252717391304348,0.03630914016631629 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,train,0.996219281663516,0.00267001170746521,0.9961285128805621,0.0027292789496098867,0.9967320261437909,0.0023079022765508037 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,test,0.9090909090909091,0.03910487699710457,0.905982905982906,0.040929061822991615,0.9035326086956521,0.04191966272093624 +flat_mae,patch,logistic,aabc_sex,35,0.046415888336127774,train,0.9527410207939508,0.009292641896713198,0.9512670565302144,0.009641742915045213,0.9488115126469123,0.010107560452117394 +flat_mae,patch,logistic,aabc_sex,35,0.046415888336127774,test,0.8363636363636363,0.048827725493406936,0.8307692307692308,0.05082034320191659,0.8288043478260869,0.05087665539806777 +flat_mae,patch,logistic,aabc_sex,36,0.046415888336127774,train,0.941398865784499,0.01002910206050626,0.939571150097466,0.010409441401084707,0.9371830944634953,0.010836711189868 +flat_mae,patch,logistic,aabc_sex,36,0.046415888336127774,test,0.9454545454545454,0.030335555877768643,0.9435897435897436,0.03179997826027416,0.9408967391304348,0.03338938655045195 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,train,0.947069943289225,0.009920434175993968,0.9455280964989703,0.010252892767958642,0.9439095518625986,0.010618988220817863 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,test,0.7818181818181819,0.05674679151377139,0.7758152173913043,0.05884026222434332,0.7758152173913043,0.058889398980197925 +flat_mae,patch,logistic,aabc_sex,38,0.046415888336127774,train,0.947069943289225,0.009384638542215508,0.9455280964989703,0.009698743670830947,0.9439095518625986,0.010047582983041889 +flat_mae,patch,logistic,aabc_sex,38,0.046415888336127774,test,0.8909090909090909,0.03794168644443647,0.884453781512605,0.04160418530946225,0.8756793478260869,0.04271098790632444 +flat_mae,patch,logistic,aabc_sex,39,0.046415888336127774,train,0.943289224952741,0.010205908220496286,0.9415598762704375,0.010584083799991954,0.9394252469298632,0.011041328832199163 +flat_mae,patch,logistic,aabc_sex,39,0.046415888336127774,test,0.8545454545454545,0.044938981587433646,0.8521505376344086,0.04550341274726737,0.8566576086956521,0.04493712614557608 +flat_mae,patch,logistic,aabc_sex,40,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,40,21.54434690031882,test,0.8909090909090909,0.03711719211380655,0.884453781512605,0.04088340802428907,0.8756793478260869,0.0420822810085415 +flat_mae,patch,logistic,aabc_sex,41,0.3593813663804626,train,0.9924385633270322,0.0038259748782496373,0.9922570257611241,0.003912583158029781,0.9928558867493186,0.003674820748043055 +flat_mae,patch,logistic,aabc_sex,41,0.3593813663804626,test,0.9090909090909091,0.038172656909588966,0.905982905982906,0.03996491774990712,0.9035326086956521,0.04081259399315248 +flat_mae,patch,logistic,aabc_sex,42,0.046415888336127774,train,0.947069943289225,0.009750075305750639,0.9455280964989703,0.010081063642494788,0.9439095518625986,0.010456269789493928 +flat_mae,patch,logistic,aabc_sex,42,0.046415888336127774,test,0.8181818181818182,0.051705510680301194,0.8151881720430108,0.05234477943645995,0.8192934782608696,0.051923672064294144 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,train,0.9376181474480151,0.010149055819932874,0.935672514619883,0.010535475177660097,0.9333069550690232,0.0110409505511924 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,test,0.9090909090909091,0.038004766991123456,0.905982905982906,0.03962778022043684,0.9035326086956521,0.04032195341213158 +flat_mae,patch,logistic,aabc_sex,44,0.3593813663804626,train,0.9924385633270322,0.003741261021372127,0.9922381665052675,0.003847416885274459,0.9916395556727923,0.0041646869962411225 +flat_mae,patch,logistic,aabc_sex,44,0.3593813663804626,test,0.8545454545454545,0.049121929593025415,0.8521505376344086,0.04948824250400598,0.8566576086956521,0.04835060739258163 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,train,0.943289224952741,0.010363093787975342,0.9415598762704375,0.010749329700784665,0.9394252469298632,0.011222235123600802 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,test,0.9454545454545454,0.030032489294481417,0.9447975911676145,0.030043243348843153,0.953125,0.02580917048744496 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,train,0.941398865784499,0.010421115813768598,0.939571150097466,0.010810924691148213,0.9371830944634953,0.011237008195025688 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,test,0.9090909090909091,0.04007429464019412,0.9071259709557582,0.040785709665491825,0.9096467391304348,0.04019296831135633 +flat_mae,patch,logistic,aabc_sex,47,0.3593813663804626,train,0.996219281663516,0.0027785064690620943,0.9961285128805621,0.0028398504298588955,0.9967320261437909,0.002401682879303668 +flat_mae,patch,logistic,aabc_sex,47,0.3593813663804626,test,0.8545454545454545,0.048155999943709095,0.8521505376344086,0.04863319087527578,0.8566576086956521,0.04764090383626747 +flat_mae,patch,logistic,aabc_sex,48,0.046415888336127774,train,0.947069943289225,0.010228935209801494,0.9453818696716718,0.010622435306939118,0.9426932207860723,0.011090245687048587 +flat_mae,patch,logistic,aabc_sex,48,0.046415888336127774,test,0.9090909090909091,0.03762538267563007,0.905982905982906,0.03926288699538724,0.9035326086956521,0.04019737365367443 +flat_mae,patch,logistic,aabc_sex,49,0.3593813663804626,train,0.994328922495274,0.003206539535203644,0.9941893034853195,0.003282875709874798,0.9944898736774231,0.0031615150296049515 +flat_mae,patch,logistic,aabc_sex,49,0.3593813663804626,test,0.8727272727272727,0.04339027578914255,0.8699763593380614,0.04462501841670839,0.8722826086956521,0.044951091782018386 +flat_mae,patch,logistic,aabc_sex,50,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,50,2.782559402207126,test,0.8909090909090909,0.042812387177869074,0.8879076086956521,0.044085342647006996,0.8879076086956521,0.044172230104372906 +flat_mae,patch,logistic,aabc_sex,51,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,51,21.54434690031882,test,0.8363636363636363,0.04676475327381005,0.8281846581048247,0.05074839817762831,0.8226902173913043,0.05102871367373478 +flat_mae,patch,logistic,aabc_sex,52,0.046415888336127774,train,0.945179584120983,0.009752913112602913,0.9434697855750487,0.010105655980521187,0.9410592338579677,0.010478724278613459 +flat_mae,patch,logistic,aabc_sex,52,0.046415888336127774,test,0.9272727272727272,0.03399599393025281,0.9260752688172043,0.0343098380758264,0.9313858695652174,0.032395814857751795 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,train,0.945179584120983,0.010209263374211465,0.9434697855750487,0.010589935237358399,0.9410592338579677,0.01099622327413462 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,test,0.9090909090909091,0.03803912731138256,0.905982905982906,0.039907493917917175,0.9035326086956521,0.040854641480386325 +flat_mae,patch,logistic,aabc_sex,54,0.3593813663804626,train,0.996219281663516,0.0026514105184520177,0.9961285128805621,0.002710450304140908,0.9967320261437909,0.0022918237978122756 +flat_mae,patch,logistic,aabc_sex,54,0.3593813663804626,test,0.8545454545454545,0.04712450889763174,0.8521505376344086,0.04756457614139713,0.8566576086956521,0.04664406519520766 +flat_mae,patch,logistic,aabc_sex,55,0.046415888336127774,train,0.9508506616257089,0.008979504521952055,0.9493518927677125,0.009318002841507713,0.9471775257188078,0.009835163868504682 +flat_mae,patch,logistic,aabc_sex,55,0.046415888336127774,test,0.8545454545454545,0.046817387460163125,0.8533333333333333,0.046846669448954895,0.8627717391304348,0.045339533930937904 +flat_mae,patch,logistic,aabc_sex,56,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,56,2.782559402207126,test,0.7818181818181819,0.05311501320812386,0.7727272727272727,0.056537664830211276,0.7697010869565217,0.05632801668031634 +flat_mae,patch,logistic,aabc_sex,57,0.3593813663804626,train,0.9886578449905482,0.004591442324538005,0.9883715818165831,0.004708127826824685,0.9883715818165831,0.0047654281816676336 +flat_mae,patch,logistic,aabc_sex,57,0.3593813663804626,test,0.9090909090909091,0.03781277933439614,0.9071259709557582,0.03864297852212116,0.9096467391304348,0.038297675602074595 +flat_mae,patch,logistic,aabc_sex,58,0.3593813663804626,train,0.996219281663516,0.0027397533084088026,0.9961238606055277,0.00280881912066526,0.9961238606055277,0.0028289653515104055 +flat_mae,patch,logistic,aabc_sex,58,0.3593813663804626,test,0.7454545454545455,0.05835787000684258,0.741263440860215,0.05915510410184426,0.7445652173913043,0.059159229418867455 +flat_mae,patch,logistic,aabc_sex,59,0.3593813663804626,train,0.9886578449905482,0.004644869056496552,0.9883715818165831,0.004762711356293787,0.9883715818165831,0.004811597940722111 +flat_mae,patch,logistic,aabc_sex,59,0.3593813663804626,test,0.8909090909090909,0.04222230032374005,0.8879076086956521,0.04354949684334448,0.8879076086956521,0.043741166299477434 +flat_mae,patch,logistic,aabc_sex,60,0.005994842503189409,train,0.9035916824196597,0.012531476515827054,0.9004483312116013,0.013059409821893655,0.8978135349805094,0.01346614581964562 +flat_mae,patch,logistic,aabc_sex,60,0.005994842503189409,test,0.8545454545454545,0.047503715383184604,0.8505434782608696,0.04907652042257629,0.8505434782608696,0.04922417086899921 +flat_mae,patch,logistic,aabc_sex,61,0.005994842503189409,train,0.8998109640831758,0.013050506859472242,0.8963990762124712,0.01360482663954308,0.893329230047774,0.013952772000752966 +flat_mae,patch,logistic,aabc_sex,61,0.005994842503189409,test,0.9454545454545454,0.02974312060192233,0.9435897435897436,0.031127601279959827,0.9408967391304348,0.032602747872421796 +flat_mae,patch,logistic,aabc_sex,62,0.3593813663804626,train,0.9886578449905482,0.004649939280600034,0.9883715818165831,0.0047713290762452,0.9883715818165831,0.004915614898201004 +flat_mae,patch,logistic,aabc_sex,62,0.3593813663804626,test,0.8909090909090909,0.04201677744397896,0.8891129032258065,0.042627363369665626,0.8940217391304348,0.041925285278970534 +flat_mae,patch,logistic,aabc_sex,63,0.046415888336127774,train,0.947069943289225,0.009170513233760676,0.9453818696716718,0.00953643767404187,0.9426932207860723,0.010039847961227947 +flat_mae,patch,logistic,aabc_sex,63,0.046415888336127774,test,0.9454545454545454,0.03074187926096693,0.9435897435897436,0.03218661757865148,0.9408967391304348,0.03381087101101002 +flat_mae,patch,logistic,aabc_sex,64,0.3593813663804626,train,0.994328922495274,0.0031289732942671135,0.9941893034853195,0.0032034606226176037,0.9944898736774231,0.0030535076992374652 +flat_mae,patch,logistic,aabc_sex,64,0.3593813663804626,test,0.8545454545454545,0.04718160448366387,0.8521505376344086,0.0477635635461171,0.8566576086956521,0.04726567459749152 +flat_mae,patch,logistic,aabc_sex,65,0.005994842503189409,train,0.9073724007561437,0.01289209652382475,0.904352318222911,0.013427613276834912,0.9016896743749816,0.013831301854085149 +flat_mae,patch,logistic,aabc_sex,65,0.005994842503189409,test,0.8909090909090909,0.041619512270723365,0.884453781512605,0.04587094461014319,0.8756793478260869,0.046985063345289284 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,train,0.947069943289225,0.009450976145930948,0.9453818696716718,0.009801342536018616,0.9426932207860723,0.010159863913005539 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,test,0.8545454545454545,0.04374862831379792,0.84593837535014,0.048009776606613215,0.8383152173913043,0.04841079648013054 +flat_mae,patch,logistic,aabc_sex,67,0.005994842503189409,train,0.9054820415879017,0.013001067457967277,0.9021935273932079,0.013615499913489771,0.8988393563703508,0.014096624269394243 +flat_mae,patch,logistic,aabc_sex,67,0.005994842503189409,test,0.8727272727272727,0.04383860708410316,0.8683760683760684,0.04565633848505554,0.8661684782608696,0.04585107816009147 +flat_mae,patch,logistic,aabc_sex,68,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,68,2.782559402207126,test,0.9636363636363636,0.024389240277678075,0.9621212121212122,0.02602883135143526,0.9565217391304348,0.029161048158093342 +flat_mae,patch,logistic,aabc_sex,69,0.046415888336127774,train,0.9395085066162571,0.010681767163156159,0.9375792796247677,0.011088184830008108,0.9349409419971277,0.01151470266614371 +flat_mae,patch,logistic,aabc_sex,69,0.046415888336127774,test,0.9454545454545454,0.030658211794121714,0.9442755825734549,0.03125687663401937,0.9470108695652174,0.030483606025122685 +flat_mae,patch,logistic,aabc_sex,70,0.3593813663804626,train,0.9924385633270322,0.0037515776773035876,0.9922570257611241,0.0038379093286509502,0.9928558867493186,0.0036588441314807448 +flat_mae,patch,logistic,aabc_sex,70,0.3593813663804626,test,0.9090909090909091,0.03797302217658138,0.9079959852793577,0.038089775277604015,0.9157608695652174,0.035683234435983056 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,train,0.947069943289225,0.009609479395923038,0.9455280964989703,0.00993567959714765,0.9439095518625986,0.01031439792886803 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,test,0.8909090909090909,0.04026324944907387,0.8863636363636364,0.04253399477606413,0.8817934782608696,0.043186029009299264 +flat_mae,patch,logistic,aabc_sex,72,0.046415888336127774,train,0.947069943289225,0.00947481129774612,0.9455985191279309,0.009780835812537653,0.9445177174008617,0.010123350671182587 +flat_mae,patch,logistic,aabc_sex,72,0.046415888336127774,test,0.9454545454545454,0.02962072091354323,0.9435897435897436,0.030902111914584772,0.9408967391304348,0.032219871006114204 +flat_mae,patch,logistic,aabc_sex,73,0.046415888336127774,train,0.9395085066162571,0.010033599095150527,0.9376638680217999,0.010401116913529837,0.9355491075353908,0.010840671399597558 +flat_mae,patch,logistic,aabc_sex,73,0.046415888336127774,test,0.9272727272727272,0.03411744421846397,0.9229691876750701,0.037700977820631115,0.9130434782608696,0.04079259634816344 +flat_mae,patch,logistic,aabc_sex,74,0.005994842503189409,train,0.9073724007561437,0.012419840425901009,0.904218013856813,0.012977238308262131,0.9010815088367186,0.013398289320544536 +flat_mae,patch,logistic,aabc_sex,74,0.005994842503189409,test,0.9090909090909091,0.03945050670572994,0.905982905982906,0.04122527157904843,0.9035326086956521,0.04222734334658748 +flat_mae,patch,logistic,aabc_sex,75,0.3593813663804626,train,0.994328922495274,0.003174927865629017,0.9941893034853195,0.003251483923734603,0.9944898736774231,0.003161235568039599 +flat_mae,patch,logistic,aabc_sex,75,0.3593813663804626,test,0.9090909090909091,0.03646451461964469,0.9071259709557582,0.03727505265281507,0.9096467391304348,0.03708450931046663 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,train,0.947069943289225,0.009686864415908002,0.945455884519075,0.010063207618008435,0.9433013863243354,0.010636670333277285 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,test,0.8727272727272727,0.045906178125418404,0.8683760683760684,0.047644664732567094,0.8661684782608696,0.04759468109052786 +flat_mae,patch,logistic,aabc_sex,77,0.3593813663804626,train,0.994328922495274,0.0031877890669275996,0.9941822314276811,0.0032741038881082076,0.99388170813916,0.003508085818013284 +flat_mae,patch,logistic,aabc_sex,77,0.3593813663804626,test,0.8363636363636363,0.049396971834782324,0.8250265111346766,0.055676251898489286,0.8165760869565217,0.055072998163466086 +flat_mae,patch,logistic,aabc_sex,78,0.3593813663804626,train,0.9924385633270322,0.004030428394517791,0.9922570257611241,0.004121001603787466,0.9928558867493186,0.0038614527833024806 +flat_mae,patch,logistic,aabc_sex,78,0.3593813663804626,test,0.9454545454545454,0.02885071292643627,0.9435897435897436,0.03013725174734259,0.9408967391304348,0.03140929611670196 +flat_mae,patch,logistic,aabc_sex,79,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,79,2.782559402207126,test,0.8727272727272727,0.041494398705933085,0.8639095086603039,0.04675570033253851,0.8539402173913043,0.04738155065165143 +flat_mae,patch,logistic,aabc_sex,80,0.3593813663804626,train,0.996219281663516,0.0026985828910675957,0.9961285128805621,0.0027581549739984974,0.9967320261437909,0.0023325986100894694 +flat_mae,patch,logistic,aabc_sex,80,0.3593813663804626,test,0.8545454545454545,0.04615648553588966,0.84593837535014,0.050824396026882636,0.8383152173913043,0.051159712209321695 +flat_mae,patch,logistic,aabc_sex,81,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,81,166.81005372000556,test,0.9272727272727272,0.03488681107204928,0.9252717391304348,0.03590662223672223,0.9252717391304348,0.036149279317895314 +flat_mae,patch,logistic,aabc_sex,82,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,82,2.782559402207126,test,0.9272727272727272,0.031088229549604884,0.9229691876750701,0.03431580579124042,0.9130434782608696,0.03717070924409279 +flat_mae,patch,logistic,aabc_sex,83,0.046415888336127774,train,0.9489603024574669,0.00947511301570348,0.9474389216202193,0.009809999281281605,0.9455435387907032,0.010260527225678847 +flat_mae,patch,logistic,aabc_sex,83,0.046415888336127774,test,0.9090909090909091,0.03728356383490459,0.9027925061859314,0.04256693149965154,0.8913043478260869,0.044578174150429396 +flat_mae,patch,logistic,aabc_sex,84,0.3593813663804626,train,0.9924385633270322,0.0038926695335589623,0.9922477212110554,0.003992428193347294,0.9922477212110554,0.004063648005513318 +flat_mae,patch,logistic,aabc_sex,84,0.3593813663804626,test,0.8545454545454545,0.05075169174398062,0.8505434782608696,0.05238526513894739,0.8505434782608696,0.05266563538232953 +flat_mae,patch,logistic,aabc_sex,85,0.3593813663804626,train,0.9886578449905482,0.004761512682991843,0.9883572497579012,0.004895524075238207,0.98776341627832,0.00517105729345984 +flat_mae,patch,logistic,aabc_sex,85,0.3593813663804626,test,0.8909090909090909,0.041465564723650386,0.8879076086956521,0.04258211327389628,0.8879076086956521,0.04266989860721131 +flat_mae,patch,logistic,aabc_sex,86,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,86,166.81005372000556,test,0.9090909090909091,0.04126365939633638,0.905982905982906,0.043043129821068185,0.9035326086956521,0.04398301655093316 +flat_mae,patch,logistic,aabc_sex,87,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,87,166.81005372000556,test,0.8727272727272727,0.044858698263740934,0.8683760683760684,0.046786230604012075,0.8661684782608696,0.04715129024754165 +flat_mae,patch,logistic,aabc_sex,88,0.3593813663804626,train,0.9905482041587902,0.004174808470840796,0.9903155058088658,0.004275300718409526,0.9906137342829509,0.00423158554548896 +flat_mae,patch,logistic,aabc_sex,88,0.3593813663804626,test,0.9090909090909091,0.03982772820390711,0.9045470322804582,0.042893741659459604,0.8974184782608696,0.04467382717449169 +flat_mae,patch,logistic,aabc_sex,89,0.046415888336127774,train,0.945179584120983,0.010079904887718209,0.943691387252473,0.010381260024918126,0.9428837304727571,0.010638263722438573 +flat_mae,patch,logistic,aabc_sex,89,0.046415888336127774,test,0.8727272727272727,0.03977874344109006,0.8609606356085229,0.04769197901313258,0.8478260869565217,0.04756154107086856 +flat_mae,patch,logistic,aabc_sex,90,0.046415888336127774,train,0.9489603024574669,0.010126457686621035,0.9472961753473184,0.010540252012932471,0.9443272077141769,0.011143999204888122 +flat_mae,patch,logistic,aabc_sex,90,0.046415888336127774,test,0.8727272727272727,0.04332389640833335,0.8663658451926415,0.04694369727377224,0.8600543478260869,0.04763515066407978 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,train,0.947069943289225,0.010353650235137476,0.9455280964989703,0.010700244404276416,0.9439095518625986,0.01104874157042308 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,test,0.9090909090909091,0.03817066503771565,0.905982905982906,0.039868188273589907,0.9035326086956521,0.04056101864698278 +flat_mae,patch,logistic,aabc_sex,92,0.046415888336127774,train,0.9508506616257089,0.009565005190530083,0.9493518927677125,0.009904670402342624,0.9471775257188078,0.010338712314583436 +flat_mae,patch,logistic,aabc_sex,92,0.046415888336127774,test,0.8909090909090909,0.04182448964284319,0.8863636363636364,0.04433174394875053,0.8817934782608696,0.04525097274774546 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,train,0.994328922495274,0.003286179412677767,0.9941893034853195,0.0033638031414667644,0.9944898736774231,0.0032012965831938016 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,test,0.8727272727272727,0.040443557241098965,0.8639095086603039,0.04576511718702058,0.8539402173913043,0.04641230245421454 +flat_mae,patch,logistic,aabc_sex,94,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,94,2.782559402207126,test,0.8727272727272727,0.04495726529137797,0.8683760683760684,0.046844482679540854,0.8661684782608696,0.047150004663879676 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,train,0.9508506616257089,0.00923252346692333,0.9493518927677125,0.009575922812089506,0.9471775257188078,0.010072320425744633 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,test,0.8363636363636363,0.049351979202861426,0.8307692307692308,0.05148845711246185,0.8288043478260869,0.051683076424542534 +flat_mae,patch,logistic,aabc_sex,96,0.005994842503189409,train,0.9017013232514177,0.01272452079300834,0.8982812684889363,0.013307078082030242,0.8949632169758786,0.013690822202203107 +flat_mae,patch,logistic,aabc_sex,96,0.005994842503189409,test,0.9090909090909091,0.03760209243342585,0.9045470322804582,0.04058931587271754,0.8974184782608696,0.04220810073840855 +flat_mae,patch,logistic,aabc_sex,97,0.046415888336127774,train,0.9489603024574669,0.010002511110847382,0.9473684210526316,0.01038508763159377,0.9449353732524399,0.010894337273563053 +flat_mae,patch,logistic,aabc_sex,97,0.046415888336127774,test,0.9090909090909091,0.03705909509048514,0.905982905982906,0.03869099332324131,0.9035326086956521,0.039355652704487015 +flat_mae,patch,logistic,aabc_sex,98,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,98,2.782559402207126,test,0.9090909090909091,0.03698342715727575,0.9027925061859314,0.041888358739349874,0.8913043478260869,0.04421931507935143 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,train,0.9035916824196597,0.013312799919093447,0.9004483312116013,0.013823121058464193,0.8978135349805094,0.014088777843543945 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,test,0.8909090909090909,0.043499725466978285,0.884453781512605,0.04815895374305792,0.8756793478260869,0.049355247684443784 +flat_mae,patch,logistic,aabc_sex,100,0.3593813663804626,train,0.9886578449905482,0.004523703349669597,0.9883855386416862,0.004627137376630234,0.9889797473548463,0.004476861312665524 +flat_mae,patch,logistic,aabc_sex,100,0.3593813663804626,test,0.8727272727272727,0.043879624971087716,0.8711943793911007,0.04400777865124005,0.8783967391304348,0.042285790159173255 diff --git a/data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic/log.txt b/data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..4a19b11d89a9454d4eb9e772c5a7ad3c3931316e --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:20:46 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/aabc_sex__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:18:41 time: 4.7528 data: 4.1739 max mem: 3205 +extract (train) [ 20/236] eta: 0:01:39 time: 0.2462 data: 0.0779 max mem: 3393 +extract (train) [ 40/236] eta: 0:01:05 time: 0.1970 data: 0.0563 max mem: 3393 +extract (train) [ 60/236] eta: 0:00:51 time: 0.2199 data: 0.0636 max mem: 3393 +extract (train) [ 80/236] eta: 0:00:42 time: 0.2113 data: 0.0647 max mem: 3393 +extract (train) [100/236] eta: 0:00:36 time: 0.2372 data: 0.0736 max mem: 3393 +extract (train) [120/236] eta: 0:00:29 time: 0.2118 data: 0.0610 max mem: 3393 +extract (train) [140/236] eta: 0:00:24 time: 0.2087 data: 0.0623 max mem: 3393 +extract (train) [160/236] eta: 0:00:18 time: 0.2107 data: 0.0602 max mem: 3393 +extract (train) [180/236] eta: 0:00:13 time: 0.2074 data: 0.0598 max mem: 3393 +extract (train) [200/236] eta: 0:00:08 time: 0.1970 data: 0.0560 max mem: 3393 +extract (train) [220/236] eta: 0:00:03 time: 0.1939 data: 0.0546 max mem: 3393 +extract (train) [235/236] eta: 0:00:00 time: 0.1647 data: 0.0440 max mem: 3393 +extract (train) Total time: 0:00:54 (0.2306 s / it) +extract (validation) [ 0/29] eta: 0:02:15 time: 4.6676 data: 4.4682 max mem: 3393 +extract (validation) [20/29] eta: 0:00:03 time: 0.1983 data: 0.0531 max mem: 3393 +extract (validation) [28/29] eta: 0:00:00 time: 0.1627 data: 0.0387 max mem: 3393 +extract (validation) Total time: 0:00:10 (0.3500 s / it) +extract (test) [ 0/28] eta: 0:01:59 time: 4.2576 data: 4.0889 max mem: 3393 +extract (test) [20/28] eta: 0:00:03 time: 0.1864 data: 0.0471 max mem: 3393 +extract (test) [27/28] eta: 0:00:00 time: 0.1613 data: 0.0371 max mem: 3393 +extract (test) Total time: 0:00:09 (0.3351 s / it) +feature extraction time: 0:01:14 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|--------:|:--------|--------:|----------:|--------:|----------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | | 0.35938 | train | 0.99055 | 0.0041883 | 0.99034 | 0.0042724 | 0.99121 | 0.0039088 | +| flat_mae | patch | logistic | aabc_sex | | 0.35938 | test | 0.96364 | 0.025035 | 0.96264 | 0.025353 | 0.9697 | 0.020863 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05199937698291747, "f1": 0.8151881720430108, "f1_std": 0.052766511852328744, "bacc": 0.8192934782608696, "bacc_std": 0.052299282119432} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 2.782559402207126, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03942971988129456, "f1": 0.9071259709557582, "f1_std": 0.0402703007695785, "bacc": 0.9096467391304348, "bacc_std": 0.040006773036680716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05799300043258283, "f1": 0.7758152173913043, "f1_std": 0.059816896096389016, "bacc": 0.7758152173913043, "bacc_std": 0.0598727041432624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 21.54434690031882, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.051276324825432866, "f1": 0.8176392572944298, "f1_std": 0.051163831855481824, "bacc": 0.8315217391304348, "bacc_std": 0.047817871758453624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 21.54434690031882, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05178820185370088, "f1": 0.8106060606060606, "f1_std": 0.05441675829108289, "bacc": 0.8070652173913043, "bacc_std": 0.05414839896590889} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030464720145665536, "f1": 0.9435897435897436, "f1_std": 0.031856996515590215, "bacc": 0.9408967391304348, "bacc_std": 0.03345344386828878} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04419335427395696, "f1": 0.8683760683760684, "f1_std": 0.046336155470605274, "bacc": 0.8661684782608696, "bacc_std": 0.047050180221596266} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.0482003532101869, "f1": 0.8328267477203647, "f1_std": 0.04925630976767027, "bacc": 0.8349184782608696, "bacc_std": 0.0494145761527003} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04613563904491454, "f1": 0.8683760683760684, "f1_std": 0.048134896265990525, "bacc": 0.8661684782608696, "bacc_std": 0.04862276659367932} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04786946437798013, "f1": 0.8484848484848485, "f1_std": 0.05079627292231832, "bacc": 0.8444293478260869, "bacc_std": 0.05113769010411776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03661195216486434, "f1": 0.9252717391304348, "f1_std": 0.03771518057012766, "bacc": 0.9252717391304348, "bacc_std": 0.03802982215594783} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.055322214688283466, "f1": 0.7727272727272727, "f1_std": 0.058789336337662784, "bacc": 0.7697010869565217, "bacc_std": 0.05838419559668523} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04483875242475267, "f1": 0.8683760683760684, "f1_std": 0.04644283901059012, "bacc": 0.8661684782608696, "bacc_std": 0.04642564110533098} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.033057145612303836, "f1": 0.9266666666666667, "f1_std": 0.032968321281559826, "bacc": 0.9375, "bacc_std": 0.0284084845105736} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04467781448815525, "f1": 0.8699763593380614, "f1_std": 0.045720592378123606, "bacc": 0.8722826086956521, "bacc_std": 0.04575159534606618} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 166.81005372000556, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04376361747931758, "f1": 0.8639095086603039, "f1_std": 0.049347647260052904, "bacc": 0.8539402173913043, "bacc_std": 0.04981773616200069} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046291508958478444, "f1": 0.84593837535014, "f1_std": 0.050932976570092874, "bacc": 0.8383152173913043, "bacc_std": 0.05095552319455272} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04425290203865701, "f1": 0.8711943793911007, "f1_std": 0.04443531354761817, "bacc": 0.8783967391304348, "bacc_std": 0.04283113976051379} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.040495623679480265, "f1": 0.8891129032258065, "f1_std": 0.04092174142487251, "bacc": 0.8940217391304348, "bacc_std": 0.0399339461506134} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0464259260712185, "f1": 0.8484848484848485, "f1_std": 0.04937434624824649, "bacc": 0.8444293478260869, "bacc_std": 0.049726421696210533} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04873936078013535, "f1": 0.8106060606060606, "f1_std": 0.05176715961049238, "bacc": 0.8070652173913043, "bacc_std": 0.0519198730634737} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030531685208204498, "f1": 0.9442755825734549, "f1_std": 0.031119085664288105, "bacc": 0.9470108695652174, "bacc_std": 0.030249032194588295} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.045079236025704295, "f1": 0.8711943793911007, "f1_std": 0.045326019952044244, "bacc": 0.8783967391304348, "bacc_std": 0.04402196633395475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.042659908211282405, "f1": 0.8639095086603039, "f1_std": 0.04810423536346599, "bacc": 0.8539402173913043, "bacc_std": 0.04835060225729434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.9818181818181818, "acc_std": 0.01828349750435446, "f1": 0.9814251941911516, "f1_std": 0.018512548584299526, "bacc": 0.984375, "bacc_std": 0.015712380667804608} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.8, "acc_std": 0.04986560450246936, "f1": 0.7931623931623932, "f1_std": 0.052388615428699485, "bacc": 0.7914402173913043, "bacc_std": 0.05247020282758435} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03651966525604011, "f1": 0.905982905982906, "f1_std": 0.03804541247166257, "bacc": 0.9035326086956521, "bacc_std": 0.03859428091845315} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03985326806181159, "f1": 0.884453781512605, "f1_std": 0.04432569604070248, "bacc": 0.8756793478260869, "bacc_std": 0.04554873268887791} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.040856596622862576, "f1": 0.8879076086956521, "f1_std": 0.041930947794341485, "bacc": 0.8879076086956521, "bacc_std": 0.04188844943408443} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04877880585691335, "f1": 0.8505434782608696, "f1_std": 0.05030690166366289, "bacc": 0.8505434782608696, "bacc_std": 0.050303162916787776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04179843011801493, "f1": 0.8683760683760684, "f1_std": 0.043371503942611855, "bacc": 0.8661684782608696, "bacc_std": 0.043328127678875075} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.02937720485834649, "f1": 0.9427282193682749, "f1_std": 0.03203095297824208, "bacc": 0.9347826086956521, "bacc_std": 0.035124918852370804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.035227243401747335, "f1": 0.9252717391304348, "f1_std": 0.036226000698534655, "bacc": 0.9252717391304348, "bacc_std": 0.03630914016631629} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03910487699710457, "f1": 0.905982905982906, "f1_std": 0.040929061822991615, "bacc": 0.9035326086956521, "bacc_std": 0.04191966272093624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.048827725493406936, "f1": 0.8307692307692308, "f1_std": 0.05082034320191659, "bacc": 0.8288043478260869, "bacc_std": 0.05087665539806777} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030335555877768643, "f1": 0.9435897435897436, "f1_std": 0.03179997826027416, "bacc": 0.9408967391304348, "bacc_std": 0.03338938655045195} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05674679151377139, "f1": 0.7758152173913043, "f1_std": 0.05884026222434332, "bacc": 0.7758152173913043, "bacc_std": 0.058889398980197925} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03794168644443647, "f1": 0.884453781512605, "f1_std": 0.04160418530946225, "bacc": 0.8756793478260869, "bacc_std": 0.04271098790632444} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.044938981587433646, "f1": 0.8521505376344086, "f1_std": 0.04550341274726737, "bacc": 0.8566576086956521, "bacc_std": 0.04493712614557608} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 21.54434690031882, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03711719211380655, "f1": 0.884453781512605, "f1_std": 0.04088340802428907, "bacc": 0.8756793478260869, "bacc_std": 0.0420822810085415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038172656909588966, "f1": 0.905982905982906, "f1_std": 0.03996491774990712, "bacc": 0.9035326086956521, "bacc_std": 0.04081259399315248} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.051705510680301194, "f1": 0.8151881720430108, "f1_std": 0.05234477943645995, "bacc": 0.8192934782608696, "bacc_std": 0.051923672064294144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038004766991123456, "f1": 0.905982905982906, "f1_std": 0.03962778022043684, "bacc": 0.9035326086956521, "bacc_std": 0.04032195341213158} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.049121929593025415, "f1": 0.8521505376344086, "f1_std": 0.04948824250400598, "bacc": 0.8566576086956521, "bacc_std": 0.04835060739258163} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030032489294481417, "f1": 0.9447975911676145, "f1_std": 0.030043243348843153, "bacc": 0.953125, "bacc_std": 0.02580917048744496} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.04007429464019412, "f1": 0.9071259709557582, "f1_std": 0.040785709665491825, "bacc": 0.9096467391304348, "bacc_std": 0.04019296831135633} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.048155999943709095, "f1": 0.8521505376344086, "f1_std": 0.04863319087527578, "bacc": 0.8566576086956521, "bacc_std": 0.04764090383626747} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03762538267563007, "f1": 0.905982905982906, "f1_std": 0.03926288699538724, "bacc": 0.9035326086956521, "bacc_std": 0.04019737365367443} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04339027578914255, "f1": 0.8699763593380614, "f1_std": 0.04462501841670839, "bacc": 0.8722826086956521, "bacc_std": 0.044951091782018386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042812387177869074, "f1": 0.8879076086956521, "f1_std": 0.044085342647006996, "bacc": 0.8879076086956521, "bacc_std": 0.044172230104372906} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 21.54434690031882, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04676475327381005, "f1": 0.8281846581048247, "f1_std": 0.05074839817762831, "bacc": 0.8226902173913043, "bacc_std": 0.05102871367373478} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03399599393025281, "f1": 0.9260752688172043, "f1_std": 0.0343098380758264, "bacc": 0.9313858695652174, "bacc_std": 0.032395814857751795} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03803912731138256, "f1": 0.905982905982906, "f1_std": 0.039907493917917175, "bacc": 0.9035326086956521, "bacc_std": 0.040854641480386325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04712450889763174, "f1": 0.8521505376344086, "f1_std": 0.04756457614139713, "bacc": 0.8566576086956521, "bacc_std": 0.04664406519520766} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046817387460163125, "f1": 0.8533333333333333, "f1_std": 0.046846669448954895, "bacc": 0.8627717391304348, "bacc_std": 0.045339533930937904} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 2.782559402207126, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05311501320812386, "f1": 0.7727272727272727, "f1_std": 0.056537664830211276, "bacc": 0.7697010869565217, "bacc_std": 0.05632801668031634} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03781277933439614, "f1": 0.9071259709557582, "f1_std": 0.03864297852212116, "bacc": 0.9096467391304348, "bacc_std": 0.038297675602074595} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 0.3593813663804626, "split": "test", "acc": 0.7454545454545455, "acc_std": 0.05835787000684258, "f1": 0.741263440860215, "f1_std": 0.05915510410184426, "bacc": 0.7445652173913043, "bacc_std": 0.059159229418867455} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04222230032374005, "f1": 0.8879076086956521, "f1_std": 0.04354949684334448, "bacc": 0.8879076086956521, "bacc_std": 0.043741166299477434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047503715383184604, "f1": 0.8505434782608696, "f1_std": 0.04907652042257629, "bacc": 0.8505434782608696, "bacc_std": 0.04922417086899921} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.02974312060192233, "f1": 0.9435897435897436, "f1_std": 0.031127601279959827, "bacc": 0.9408967391304348, "bacc_std": 0.032602747872421796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04201677744397896, "f1": 0.8891129032258065, "f1_std": 0.042627363369665626, "bacc": 0.8940217391304348, "bacc_std": 0.041925285278970534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.03074187926096693, "f1": 0.9435897435897436, "f1_std": 0.03218661757865148, "bacc": 0.9408967391304348, "bacc_std": 0.03381087101101002} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04718160448366387, "f1": 0.8521505376344086, "f1_std": 0.0477635635461171, "bacc": 0.8566576086956521, "bacc_std": 0.04726567459749152} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041619512270723365, "f1": 0.884453781512605, "f1_std": 0.04587094461014319, "bacc": 0.8756793478260869, "bacc_std": 0.046985063345289284} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04374862831379792, "f1": 0.84593837535014, "f1_std": 0.048009776606613215, "bacc": 0.8383152173913043, "bacc_std": 0.04841079648013054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04383860708410316, "f1": 0.8683760683760684, "f1_std": 0.04565633848505554, "bacc": 0.8661684782608696, "bacc_std": 0.04585107816009147} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 2.782559402207126, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.024389240277678075, "f1": 0.9621212121212122, "f1_std": 0.02602883135143526, "bacc": 0.9565217391304348, "bacc_std": 0.029161048158093342} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030658211794121714, "f1": 0.9442755825734549, "f1_std": 0.03125687663401937, "bacc": 0.9470108695652174, "bacc_std": 0.030483606025122685} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03797302217658138, "f1": 0.9079959852793577, "f1_std": 0.038089775277604015, "bacc": 0.9157608695652174, "bacc_std": 0.035683234435983056} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04026324944907387, "f1": 0.8863636363636364, "f1_std": 0.04253399477606413, "bacc": 0.8817934782608696, "bacc_std": 0.043186029009299264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.02962072091354323, "f1": 0.9435897435897436, "f1_std": 0.030902111914584772, "bacc": 0.9408967391304348, "bacc_std": 0.032219871006114204} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03411744421846397, "f1": 0.9229691876750701, "f1_std": 0.037700977820631115, "bacc": 0.9130434782608696, "bacc_std": 0.04079259634816344} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03945050670572994, "f1": 0.905982905982906, "f1_std": 0.04122527157904843, "bacc": 0.9035326086956521, "bacc_std": 0.04222734334658748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03646451461964469, "f1": 0.9071259709557582, "f1_std": 0.03727505265281507, "bacc": 0.9096467391304348, "bacc_std": 0.03708450931046663} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.045906178125418404, "f1": 0.8683760683760684, "f1_std": 0.047644664732567094, "bacc": 0.8661684782608696, "bacc_std": 0.04759468109052786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049396971834782324, "f1": 0.8250265111346766, "f1_std": 0.055676251898489286, "bacc": 0.8165760869565217, "bacc_std": 0.055072998163466086} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.02885071292643627, "f1": 0.9435897435897436, "f1_std": 0.03013725174734259, "bacc": 0.9408967391304348, "bacc_std": 0.03140929611670196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.041494398705933085, "f1": 0.8639095086603039, "f1_std": 0.04675570033253851, "bacc": 0.8539402173913043, "bacc_std": 0.04738155065165143} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04615648553588966, "f1": 0.84593837535014, "f1_std": 0.050824396026882636, "bacc": 0.8383152173913043, "bacc_std": 0.051159712209321695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 166.81005372000556, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03488681107204928, "f1": 0.9252717391304348, "f1_std": 0.03590662223672223, "bacc": 0.9252717391304348, "bacc_std": 0.036149279317895314} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 2.782559402207126, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.031088229549604884, "f1": 0.9229691876750701, "f1_std": 0.03431580579124042, "bacc": 0.9130434782608696, "bacc_std": 0.03717070924409279} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03728356383490459, "f1": 0.9027925061859314, "f1_std": 0.04256693149965154, "bacc": 0.8913043478260869, "bacc_std": 0.044578174150429396} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.05075169174398062, "f1": 0.8505434782608696, "f1_std": 0.05238526513894739, "bacc": 0.8505434782608696, "bacc_std": 0.05266563538232953} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041465564723650386, "f1": 0.8879076086956521, "f1_std": 0.04258211327389628, "bacc": 0.8879076086956521, "bacc_std": 0.04266989860721131} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 166.81005372000556, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.04126365939633638, "f1": 0.905982905982906, "f1_std": 0.043043129821068185, "bacc": 0.9035326086956521, "bacc_std": 0.04398301655093316} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 166.81005372000556, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044858698263740934, "f1": 0.8683760683760684, "f1_std": 0.046786230604012075, "bacc": 0.8661684782608696, "bacc_std": 0.04715129024754165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03982772820390711, "f1": 0.9045470322804582, "f1_std": 0.042893741659459604, "bacc": 0.8974184782608696, "bacc_std": 0.04467382717449169} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.03977874344109006, "f1": 0.8609606356085229, "f1_std": 0.04769197901313258, "bacc": 0.8478260869565217, "bacc_std": 0.04756154107086856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04332389640833335, "f1": 0.8663658451926415, "f1_std": 0.04694369727377224, "bacc": 0.8600543478260869, "bacc_std": 0.04763515066407978} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03817066503771565, "f1": 0.905982905982906, "f1_std": 0.039868188273589907, "bacc": 0.9035326086956521, "bacc_std": 0.04056101864698278} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04182448964284319, "f1": 0.8863636363636364, "f1_std": 0.04433174394875053, "bacc": 0.8817934782608696, "bacc_std": 0.04525097274774546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.040443557241098965, "f1": 0.8639095086603039, "f1_std": 0.04576511718702058, "bacc": 0.8539402173913043, "bacc_std": 0.04641230245421454} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04495726529137797, "f1": 0.8683760683760684, "f1_std": 0.046844482679540854, "bacc": 0.8661684782608696, "bacc_std": 0.047150004663879676} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049351979202861426, "f1": 0.8307692307692308, "f1_std": 0.05148845711246185, "bacc": 0.8288043478260869, "bacc_std": 0.051683076424542534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03760209243342585, "f1": 0.9045470322804582, "f1_std": 0.04058931587271754, "bacc": 0.8974184782608696, "bacc_std": 0.04220810073840855} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03705909509048514, "f1": 0.905982905982906, "f1_std": 0.03869099332324131, "bacc": 0.9035326086956521, "bacc_std": 0.039355652704487015} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 2.782559402207126, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03698342715727575, "f1": 0.9027925061859314, "f1_std": 0.041888358739349874, "bacc": 0.8913043478260869, "bacc_std": 0.04421931507935143} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.043499725466978285, "f1": 0.884453781512605, "f1_std": 0.04815895374305792, "bacc": 0.8756793478260869, "bacc_std": 0.049355247684443784} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.043879624971087716, "f1": 0.8711943793911007, "f1_std": 0.04400777865124005, "bacc": 0.8783967391304348, "bacc_std": 0.042285790159173255} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | train | 100 | 7.9301 | 32.868 | 0.96299 | 0.032515 | 0.96182 | 0.033599 | 0.96056 | 0.034865 | +| flat_mae | patch | logistic | aabc_sex | test | 100 | 7.9301 | 32.868 | 0.88236 | 0.044345 | 0.87824 | 0.045705 | 0.87665 | 0.045811 | + + +done! total time: 0:04:55 diff --git a/data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic/config.yaml b/data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49883a3409f6e86d347dffe40a769257b8a45205 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic/eval_table.csv b/data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..82a02f1fecf46e35e2f56b0eb938e9016dd12fa0 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,train,0.8076923076923077,0.015023255323052305,0.8046925716405453,0.01530891510991884,0.803557357672861,0.01531873503509355 +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,test,0.6451612903225806,0.043224590738802145,0.6428384393820372,0.0435918900509514,0.6428384393820372,0.043447962365232795 +flat_mae,patch,logistic,abide_dx,1,2.782559402207126,train,0.9943019943019943,0.0029694264369574713,0.9942414174972314,0.0030009753964670275,0.9942414174972314,0.0030115541185872013 +flat_mae,patch,logistic,abide_dx,1,2.782559402207126,test,0.6451612903225806,0.04315406600902564,0.6418067226890756,0.043853897294788585,0.6418067226890756,0.04374980580424382 +flat_mae,patch,logistic,abide_dx,2,0.3593813663804626,train,0.915954415954416,0.010854473156415665,0.9150356428534796,0.010970132395355128,0.9149132521225545,0.010976524724618261 +flat_mae,patch,logistic,abide_dx,2,0.3593813663804626,test,0.6612903225806451,0.04283227215524843,0.6569169960474308,0.043587852597675246,0.6565126050420168,0.04327851883971466 +flat_mae,patch,logistic,abide_dx,3,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,3,10000.0,test,0.6048387096774194,0.04284558339723388,0.6035753898349319,0.042888159933636594,0.6050420168067226,0.04271828205266412 +flat_mae,patch,logistic,abide_dx,4,0.3593813663804626,train,0.9074074074074074,0.01067380961069783,0.9063951997538335,0.010802502384919824,0.9062753783684017,0.0108869758568305 +flat_mae,patch,logistic,abide_dx,4,0.3593813663804626,test,0.7258064516129032,0.041557026751496036,0.7246603970741902,0.04177510511188718,0.7263655462184874,0.04191788348324183 +flat_mae,patch,logistic,abide_dx,5,0.046415888336127774,train,0.8062678062678063,0.014466616776208172,0.8028822727835818,0.014795653190159375,0.8012550756736803,0.014772032008079044 +flat_mae,patch,logistic,abide_dx,5,0.046415888336127774,test,0.6451612903225806,0.0443420626761152,0.6428384393820372,0.044669201076798955,0.6433823529411764,0.04481200603584851 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,train,0.8062678062678063,0.014653536726215194,0.8027307590584501,0.014982977618986773,0.8009597637504614,0.014954862506675123 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,test,0.6048387096774194,0.047147189101435234,0.5989703649924097,0.047699611889035626,0.5987394957983193,0.04727771312706628 +flat_mae,patch,logistic,abide_dx,7,0.3593813663804626,train,0.9102564102564102,0.01115162858686779,0.9094314121007956,0.011241365613756722,0.9100406053894425,0.011212807114194855 +flat_mae,patch,logistic,abide_dx,7,0.3593813663804626,test,0.6935483870967742,0.04272298727897597,0.6869519000797236,0.04397862950558822,0.6859243697478992,0.04328323320618852 +flat_mae,patch,logistic,abide_dx,8,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,8,1291.5496650148827,test,0.5967741935483871,0.04701133157618572,0.5941345902068604,0.04750201782040206,0.5945378151260504,0.04751588633058962 +flat_mae,patch,logistic,abide_dx,9,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,9,10000.0,test,0.6612903225806451,0.04209484299117427,0.6580882352941176,0.04259248340526784,0.6580882352941176,0.042533219587874434 +flat_mae,patch,logistic,abide_dx,10,2.782559402207126,train,0.9928774928774928,0.003187670085555591,0.9927952559531508,0.003228302615869019,0.9923588039867111,0.0034337303384864436 +flat_mae,patch,logistic,abide_dx,10,2.782559402207126,test,0.6854838709677419,0.042173629016143015,0.6829891838741396,0.042365119017347845,0.6832983193277311,0.04213584580354409 +flat_mae,patch,logistic,abide_dx,11,2.782559402207126,train,0.9943019943019943,0.0028465445234351007,0.9942414174972314,0.0028768519003028632,0.9942414174972314,0.002890128848900744 +flat_mae,patch,logistic,abide_dx,11,2.782559402207126,test,0.5645161290322581,0.0419770265696058,0.5588932806324111,0.04231845519430366,0.5588235294117647,0.04202422171966164 +flat_mae,patch,logistic,abide_dx,12,0.3593813663804626,train,0.9301994301994302,0.009289120815307752,0.9293041188910789,0.009413771166333905,0.9284237726098191,0.009451296298755905 +flat_mae,patch,logistic,abide_dx,12,0.3593813663804626,test,0.5725806451612904,0.04517915545258621,0.5703170970905524,0.04534129572610806,0.5709033613445378,0.04531358437311203 +flat_mae,patch,logistic,abide_dx,13,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,13,166.81005372000556,test,0.5887096774193549,0.042504208030642976,0.5886829268292683,0.04256958207030331,0.5934873949579832,0.04285493145967109 +flat_mae,patch,logistic,abide_dx,14,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,14,21.54434690031882,test,0.6774193548387096,0.041448823177117476,0.6732542819499341,0.04249841731098369,0.6727941176470589,0.04234986706186723 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,train,0.8176638176638177,0.01455262065847524,0.8147528140848554,0.014844518985478195,0.8133628645256552,0.014861021056048597 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,test,0.6129032258064516,0.04128413512107416,0.6063492063492064,0.04215004040000179,0.60609243697479,0.04168581841441776 +flat_mae,patch,logistic,abide_dx,16,0.3593813663804626,train,0.9173789173789174,0.010718498511414825,0.9165491572112546,0.01083046249957668,0.9167958656330749,0.010886878518908787 +flat_mae,patch,logistic,abide_dx,16,0.3593813663804626,test,0.6612903225806451,0.04215580769939964,0.6569169960474308,0.042888327361309195,0.6565126050420168,0.04270704596111274 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,train,0.9216524216524217,0.0100292876366316,0.9208881184649713,0.010128220099199935,0.9212624584717608,0.01017554054669474 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,test,0.6854838709677419,0.03763943553216522,0.6761968530297957,0.039557096088534305,0.6754201680672269,0.03872134417024648 +flat_mae,patch,logistic,abide_dx,18,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,18,166.81005372000556,test,0.6612903225806451,0.04127916620954516,0.6522435897435898,0.0432351627213289,0.6517857142857143,0.04216105400839574 +flat_mae,patch,logistic,abide_dx,19,0.046415888336127774,train,0.8005698005698005,0.015254348513730098,0.7972370766488414,0.015577746819761484,0.7957918050941306,0.01558774066038369 +flat_mae,patch,logistic,abide_dx,19,0.046415888336127774,test,0.6693548387096774,0.04139282576696095,0.6553454003118433,0.04473095336025959,0.6559873949579832,0.042654001070094476 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,train,0.8247863247863247,0.01402143633664388,0.8221792952331135,0.014337138366353938,0.8210040605389443,0.014465845625968213 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,test,0.6209677419354839,0.04198490883924048,0.6118548118548119,0.04341000539334493,0.6118697478991597,0.04262800165133694 +flat_mae,patch,logistic,abide_dx,21,0.046415888336127774,train,0.8276353276353277,0.014068343470391572,0.8253095674385029,0.01428366686455881,0.8244739756367663,0.014337730103275317 +flat_mae,patch,logistic,abide_dx,21,0.046415888336127774,test,0.6209677419354839,0.04482304205266927,0.6179613241560145,0.045105198254835395,0.618172268907563,0.044950100375050754 +flat_mae,patch,logistic,abide_dx,22,0.3593813663804626,train,0.9102564102564102,0.011051510835814505,0.9089840815780538,0.011279565148368063,0.9076781100036914,0.011502004579277457 +flat_mae,patch,logistic,abide_dx,22,0.3593813663804626,test,0.6854838709677419,0.040528413357569265,0.6808131476470201,0.04155361725552617,0.6801470588235294,0.041286066366324106 +flat_mae,patch,logistic,abide_dx,23,0.3593813663804626,train,0.915954415954416,0.011147073076452319,0.9150858101167985,0.01126609270683412,0.9152085640457733,0.011325981027934064 +flat_mae,patch,logistic,abide_dx,23,0.3593813663804626,test,0.6451612903225806,0.04473473672813193,0.6405797101449275,0.04550981119611706,0.6402310924369747,0.045267181402848494 +flat_mae,patch,logistic,abide_dx,24,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,24,1291.5496650148827,test,0.6532258064516129,0.04364654409749443,0.6521171788347361,0.04366482493733975,0.6538865546218487,0.04366658880092021 +flat_mae,patch,logistic,abide_dx,25,0.3593813663804626,train,0.9202279202279202,0.010275614736461753,0.9194267724798321,0.010370096578298078,0.9196751568844592,0.010369107710611104 +flat_mae,patch,logistic,abide_dx,25,0.3593813663804626,test,0.6532258064516129,0.04230751539006691,0.6530227110040997,0.042368543502131546,0.6570378151260504,0.042451516665635965 +flat_mae,patch,logistic,abide_dx,26,2.782559402207126,train,0.9928774928774928,0.0032873888222210244,0.9927996307502949,0.003324652830105136,0.9926541159099298,0.0034030730465437345 +flat_mae,patch,logistic,abide_dx,26,2.782559402207126,test,0.6048387096774194,0.04605660906203847,0.6035753898349319,0.046211346877024106,0.6050420168067226,0.04645532524166727 +flat_mae,patch,logistic,abide_dx,27,0.3593813663804626,train,0.9173789173789174,0.010250984240566452,0.9162918068107992,0.010415400844174067,0.9153193060169804,0.010538695625765392 +flat_mae,patch,logistic,abide_dx,27,0.3593813663804626,test,0.5967741935483871,0.04488951635359795,0.5929621848739496,0.045110436897883986,0.5929621848739496,0.045099040217754106 +flat_mae,patch,logistic,abide_dx,28,2.782559402207126,train,0.9914529914529915,0.003496239603651545,0.9913621262458472,0.0035332257440301856,0.9913621262458472,0.0035363322373621357 +flat_mae,patch,logistic,abide_dx,28,2.782559402207126,test,0.6370967741935484,0.04399447747009622,0.6351748937561295,0.04423286606922299,0.6360294117647058,0.044260688416216436 +flat_mae,patch,logistic,abide_dx,29,2.782559402207126,train,0.9914529914529915,0.003356839949269641,0.9913569505548625,0.003396868568225819,0.9910668143226282,0.0035273379435853437 +flat_mae,patch,logistic,abide_dx,29,2.782559402207126,test,0.6693548387096774,0.04133937044013971,0.6688163637548042,0.041428873793280654,0.6717436974789917,0.041605944231173975 +flat_mae,patch,logistic,abide_dx,30,0.3593813663804626,train,0.9116809116809117,0.01063221602018403,0.9102708943223519,0.010860964338577563,0.9083794758213363,0.011020303485081782 +flat_mae,patch,logistic,abide_dx,30,0.3593813663804626,test,0.6532258064516129,0.041810418345264555,0.6480760345851759,0.04282099136622034,0.6475840336134454,0.04241237638858726 +flat_mae,patch,logistic,abide_dx,31,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,31,10000.0,test,0.5483870967741935,0.044143422832812404,0.5386659580122243,0.04490701653975853,0.539390756302521,0.04425953469133911 +flat_mae,patch,logistic,abide_dx,32,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,32,2.782559402207126,test,0.6290322580645161,0.043603310645770825,0.6191239316239316,0.044692035161611784,0.6192226890756303,0.043857318601906746 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,train,0.7393162393162394,0.01563757843475421,0.7316037213539109,0.016218017645940833,0.7296050203026947,0.015931203042934848 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,test,0.6370967741935484,0.04108751541523139,0.6217205613178767,0.04417873017802807,0.6234243697478992,0.042146266877715666 +flat_mae,patch,logistic,abide_dx,34,0.3593813663804626,train,0.9173789173789174,0.010532135454171799,0.9165491572112546,0.01065182537391755,0.9167958656330749,0.010753258069014967 +flat_mae,patch,logistic,abide_dx,34,0.3593813663804626,test,0.5645161290322581,0.04349792775572952,0.5588932806324111,0.044009632162743506,0.5588235294117647,0.04379720389710001 +flat_mae,patch,logistic,abide_dx,35,0.3593813663804626,train,0.9116809116809117,0.010755870984870384,0.9106884890669118,0.010865805171609722,0.9104466592838686,0.01084875086034066 +flat_mae,patch,logistic,abide_dx,35,0.3593813663804626,test,0.6612903225806451,0.04317668128112225,0.6569169960474308,0.04388080405654146,0.6565126050420168,0.04358627245822747 +flat_mae,patch,logistic,abide_dx,36,0.3593813663804626,train,0.915954415954416,0.010562317944884457,0.9149840202471782,0.010710437475330853,0.9146179401993355,0.010859409012890406 +flat_mae,patch,logistic,abide_dx,36,0.3593813663804626,test,0.6290322580645161,0.04330260390187433,0.6227513227513227,0.04445636638127347,0.6223739495798319,0.04404975031221714 +flat_mae,patch,logistic,abide_dx,37,0.046415888336127774,train,0.8176638176638177,0.01376622610806132,0.8147528140848554,0.01408813402859566,0.8133628645256552,0.014180642802679593 +flat_mae,patch,logistic,abide_dx,37,0.046415888336127774,test,0.6290322580645161,0.04255647835707375,0.6242424242424243,0.04344865824041677,0.6239495798319328,0.04317765453077027 +flat_mae,patch,logistic,abide_dx,38,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,38,21.54434690031882,test,0.6935483870967742,0.04208442533003155,0.6869519000797236,0.0433402465549648,0.6859243697478992,0.042839182817647134 +flat_mae,patch,logistic,abide_dx,39,0.046415888336127774,train,0.8176638176638177,0.014310673749141148,0.8146167557932263,0.014617418898621374,0.8130675526024363,0.014638327583470214 +flat_mae,patch,logistic,abide_dx,39,0.046415888336127774,test,0.5967741935483871,0.04213123975345814,0.5860042735042735,0.04339517591280546,0.5866596638655462,0.04251203404481154 +flat_mae,patch,logistic,abide_dx,40,0.046415888336127774,train,0.8290598290598291,0.013963972662825758,0.8265767434966278,0.01425098857927296,0.8254706533776301,0.014336668676694865 +flat_mae,patch,logistic,abide_dx,40,0.046415888336127774,test,0.6209677419354839,0.042842863187684196,0.6049081418208935,0.04630340262026788,0.6071428571428572,0.044003501244538744 +flat_mae,patch,logistic,abide_dx,41,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,41,21.54434690031882,test,0.5806451612903226,0.04056280378608651,0.5766806722689075,0.04119806875233372,0.5766806722689075,0.04100330391895227 +flat_mae,patch,logistic,abide_dx,42,2.782559402207126,train,0.9957264957264957,0.0024065744778682127,0.995679778450177,0.0024341761819457568,0.9955334071613142,0.00254414428936637 +flat_mae,patch,logistic,abide_dx,42,2.782559402207126,test,0.5806451612903226,0.04538348565316704,0.5788923719958203,0.04549809376664079,0.5798319327731092,0.045594096939506494 +flat_mae,patch,logistic,abide_dx,43,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,43,166.81005372000556,test,0.6209677419354839,0.041061801471226685,0.6167554415729598,0.04173116621886512,0.6165966386554622,0.04165952078991606 +flat_mae,patch,logistic,abide_dx,44,0.046415888336127774,train,0.8361823361823362,0.014936594318071556,0.8332558787991449,0.015290752788390979,0.8313399778516057,0.01532361023096462 +flat_mae,patch,logistic,abide_dx,44,0.046415888336127774,test,0.5725806451612904,0.044410378716839984,0.5623043623043623,0.045657614767207845,0.5630252100840336,0.0447917483011613 +flat_mae,patch,logistic,abide_dx,45,0.046415888336127774,train,0.8247863247863247,0.014676547408116674,0.8223022545095335,0.014952446880696175,0.8212993724621631,0.015023449062865263 +flat_mae,patch,logistic,abide_dx,45,0.046415888336127774,test,0.6209677419354839,0.040768821103798765,0.6118548118548119,0.042217892175366895,0.6118697478991597,0.04132544718510285 +flat_mae,patch,logistic,abide_dx,46,0.000774263682681127,train,0.6809116809116809,0.016588831325984793,0.662962962962963,0.018315778693097137,0.6642303433001107,0.017238286457480993 +flat_mae,patch,logistic,abide_dx,46,0.000774263682681127,test,0.5403225806451613,0.04253791538824539,0.5174438451560046,0.044847394023006776,0.5241596638655462,0.04273646297939909 +flat_mae,patch,logistic,abide_dx,47,0.3593813663804626,train,0.9230769230769231,0.01016297420817992,0.9220647856514338,0.01033223711462136,0.921077888519749,0.010496166965081844 +flat_mae,patch,logistic,abide_dx,47,0.3593813663804626,test,0.6370967741935484,0.04199716307747072,0.6317074780542539,0.0427370529042198,0.6313025210084033,0.042378926557957046 +flat_mae,patch,logistic,abide_dx,48,0.046415888336127774,train,0.8133903133903134,0.014799831070519402,0.8108723416069743,0.01499904967334967,0.810077519379845,0.014973675060664748 +flat_mae,patch,logistic,abide_dx,48,0.046415888336127774,test,0.6370967741935484,0.04151339690736908,0.6301451580831179,0.04271308281199993,0.6297268907563025,0.042062292330763015 +flat_mae,patch,logistic,abide_dx,49,0.005994842503189409,train,0.7279202279202279,0.016471272635025874,0.7206828668742904,0.017062987842959516,0.7189737910668144,0.016816584963320856 +flat_mae,patch,logistic,abide_dx,49,0.005994842503189409,test,0.6209677419354839,0.0436074002779464,0.6137071651090342,0.044923908723406784,0.6134453781512605,0.04434425857556162 +flat_mae,patch,logistic,abide_dx,50,0.046415888336127774,train,0.8076923076923077,0.014549062853285371,0.8048309337924415,0.014801601302943508,0.8037283130306385,0.014809465245908328 +flat_mae,patch,logistic,abide_dx,50,0.046415888336127774,test,0.5967741935483871,0.04724037707095966,0.5880946053680574,0.048620839071026385,0.5882352941176471,0.04785585443813488 +flat_mae,patch,logistic,abide_dx,51,0.3593813663804626,train,0.9259259259259259,0.010048177386149819,0.9250935714754744,0.01016549139611974,0.9248431155407899,0.010233763143644296 +flat_mae,patch,logistic,abide_dx,51,0.3593813663804626,test,0.5645161290322581,0.045006693917666776,0.5616653574234092,0.045693244711812256,0.5619747899159664,0.04573902997715608 +flat_mae,patch,logistic,abide_dx,52,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,52,21.54434690031882,test,0.5887096774193549,0.04131049154583597,0.5808311791608669,0.04237888167746313,0.5808823529411764,0.04182519286053294 +flat_mae,patch,logistic,abide_dx,53,0.046415888336127774,train,0.8304843304843305,0.014085791343758015,0.8270445525183073,0.01452455009942656,0.8246954595791806,0.01457409472900942 +flat_mae,patch,logistic,abide_dx,53,0.046415888336127774,test,0.6451612903225806,0.0442763058430251,0.6418067226890756,0.044582028371541904,0.6418067226890756,0.04444034586917256 +flat_mae,patch,logistic,abide_dx,54,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,54,21.54434690031882,test,0.6451612903225806,0.042655420658463415,0.6405797101449275,0.04336799328094336,0.6402310924369747,0.04317404935859212 +flat_mae,patch,logistic,abide_dx,55,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,55,166.81005372000556,test,0.5806451612903226,0.04710709122525476,0.5796610169491525,0.04720669215907874,0.58140756302521,0.04716689954868565 +flat_mae,patch,logistic,abide_dx,56,0.005994842503189409,train,0.7250712250712251,0.016139045662496448,0.7172161172161171,0.01687146052434255,0.7155038759689922,0.0165927462611848 +flat_mae,patch,logistic,abide_dx,56,0.005994842503189409,test,0.717741935483871,0.04074938953086779,0.710955710955711,0.04234767929232972,0.7095588235294117,0.04165388557120636 +flat_mae,patch,logistic,abide_dx,57,0.005994842503189409,train,0.7279202279202279,0.01590782370538978,0.7206828668742904,0.016514100488753115,0.7189737910668144,0.01628190002659512 +flat_mae,patch,logistic,abide_dx,57,0.005994842503189409,test,0.6290322580645161,0.04221403764135138,0.6145945945945945,0.044986439868740555,0.6160714285714286,0.0431033548379039 +flat_mae,patch,logistic,abide_dx,58,0.3593813663804626,train,0.9273504273504274,0.009499456905323157,0.9265995985755378,0.009602448812581503,0.9267257290513105,0.00966939099959458 +flat_mae,patch,logistic,abide_dx,58,0.3593813663804626,test,0.5887096774193549,0.04345151711079797,0.5854473942969518,0.043649732173370655,0.585609243697479,0.043616131507200576 +flat_mae,patch,logistic,abide_dx,59,0.046415888336127774,train,0.8176638176638177,0.014647919225946625,0.8135721221275218,0.01508924023095054,0.811000369139904,0.015042841834826941 +flat_mae,patch,logistic,abide_dx,59,0.046415888336127774,test,0.6612903225806451,0.043948354421975616,0.6590730557737627,0.04434662818021113,0.6596638655462186,0.044411496087689274 +flat_mae,patch,logistic,abide_dx,60,0.3593813663804626,train,0.9330484330484331,0.009771494587646683,0.932316529052772,0.00988118015103284,0.9321889996308601,0.009927091730733757 +flat_mae,patch,logistic,abide_dx,60,0.3593813663804626,test,0.5645161290322581,0.04285740944369791,0.5571428571428572,0.044257011616676105,0.5572478991596639,0.04359402238435234 +flat_mae,patch,logistic,abide_dx,61,0.046415888336127774,train,0.7991452991452992,0.015417679727343335,0.7958641569203293,0.015775240872309863,0.794499815430048,0.01581606798080206 +flat_mae,patch,logistic,abide_dx,61,0.046415888336127774,test,0.6209677419354839,0.04275506797642934,0.6153389215233318,0.043364271345563546,0.6150210084033614,0.04303148828429284 +flat_mae,patch,logistic,abide_dx,62,0.3593813663804626,train,0.9173789173789174,0.01066821280829246,0.916450522030337,0.010790601634214402,0.9162052417866371,0.01083830168511745 +flat_mae,patch,logistic,abide_dx,62,0.3593813663804626,test,0.6129032258064516,0.044591380995715954,0.6092436974789917,0.04496618195515294,0.6092436974789917,0.04496221435903084 +flat_mae,patch,logistic,abide_dx,63,0.046415888336127774,train,0.8176638176638177,0.014797777843489991,0.8143348320550119,0.015150912751626607,0.8124769287559985,0.015140893852872147 +flat_mae,patch,logistic,abide_dx,63,0.046415888336127774,test,0.6532258064516129,0.04251364474605974,0.6513893429225237,0.042828933961936426,0.6523109243697479,0.04286573426562781 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,train,0.9116809116809117,0.011728944049037427,0.9107419712070874,0.011864664717950552,0.9107419712070874,0.0119647839716125 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,test,0.6693548387096774,0.04162876354414529,0.6667322189446083,0.042027284733257224,0.6670168067226891,0.04201562204212226 +flat_mae,patch,logistic,abide_dx,65,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,65,10000.0,test,0.6048387096774194,0.04322006160669094,0.5953379953379954,0.04410923997215498,0.5955882352941176,0.04338512030311183 +flat_mae,patch,logistic,abide_dx,66,0.046415888336127774,train,0.8219373219373219,0.013985744249032043,0.8191597885560604,0.014284699284012301,0.8178294573643411,0.014347443462266908 +flat_mae,patch,logistic,abide_dx,66,0.046415888336127774,test,0.5645161290322581,0.04442157735672521,0.5616653574234092,0.04464031303680255,0.5619747899159664,0.04462763413214361 +flat_mae,patch,logistic,abide_dx,67,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,67,21.54434690031882,test,0.6774193548387096,0.0440312861762055,0.6760710553814002,0.044205117682407334,0.6775210084033614,0.044376538753704724 +flat_mae,patch,logistic,abide_dx,68,0.046415888336127774,train,0.8148148148148148,0.014615182184832936,0.8114338138058714,0.014969884121617379,0.8095976375046142,0.014982231173465043 +flat_mae,patch,logistic,abide_dx,68,0.046415888336127774,test,0.6774193548387096,0.03988968920524124,0.6766623207301173,0.03985090955607941,0.6790966386554622,0.0396630403764 +flat_mae,patch,logistic,abide_dx,69,0.005994842503189409,train,0.7222222222222222,0.015845758146373127,0.71220011647391,0.016694388530313478,0.7105574012550757,0.016255941375285705 +flat_mae,patch,logistic,abide_dx,69,0.005994842503189409,test,0.6935483870967742,0.03918475608270628,0.6744957170489085,0.04368269694370768,0.6764705882352942,0.040576937504884024 +flat_mae,patch,logistic,abide_dx,70,0.046415888336127774,train,0.8219373219373219,0.014890421525564135,0.8194128602739162,0.015159320973435295,0.8184200812107789,0.015210309858869862 +flat_mae,patch,logistic,abide_dx,70,0.046415888336127774,test,0.6532258064516129,0.043387429546631266,0.6448884448884449,0.04505532105085936,0.6444327731092437,0.044272195026281394 +flat_mae,patch,logistic,abide_dx,71,0.3593813663804626,train,0.9173789173789174,0.009864659919561819,0.916450522030337,0.009983634559924508,0.9162052417866371,0.010057325200153452 +flat_mae,patch,logistic,abide_dx,71,0.3593813663804626,test,0.5967741935483871,0.04184497366494297,0.5836690840719849,0.04386788504814251,0.5850840336134454,0.042524133223668825 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,train,0.8190883190883191,0.01432870458439146,0.816523466038299,0.014585119642559291,0.8155407899593946,0.014622458792060318 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,test,0.5725806451612904,0.04588841769174245,0.5643931861867832,0.04710866956910183,0.5646008403361344,0.04645143615475117 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,train,0.8247863247863247,0.013952505630648144,0.8217917415685116,0.014284254354180789,0.8201181247692876,0.014337343780931565 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,test,0.5645161290322581,0.04054969587540614,0.5503626107977437,0.042893102688721346,0.5525210084033614,0.04130253269606253 +flat_mae,patch,logistic,abide_dx,74,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,74,166.81005372000556,test,0.6532258064516129,0.04411038501982605,0.6532032520325204,0.044142369111108797,0.6586134453781513,0.044005495943674026 +flat_mae,patch,logistic,abide_dx,75,2.782559402207126,train,0.9914529914529915,0.0036460769992912477,0.9913671541942677,0.0036803754229016166,0.9916574381690662,0.0035835916468042297 +flat_mae,patch,logistic,abide_dx,75,2.782559402207126,test,0.6612903225806451,0.0405679341855003,0.6569169960474308,0.04136011726842881,0.6565126050420168,0.04115062979413037 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,train,0.9216524216524217,0.010254354019900602,0.9207478154846576,0.010399181444338068,0.9203765227021041,0.010548623485043113 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,test,0.5806451612903226,0.04510328936739077,0.5752305665349143,0.04580812510554027,0.5751050420168067,0.04547954213225819 +flat_mae,patch,logistic,abide_dx,77,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,77,166.81005372000556,test,0.5645161290322581,0.04299082737391135,0.5603991596638656,0.04326951671349658,0.5603991596638656,0.04316104476965729 +flat_mae,patch,logistic,abide_dx,78,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,78,1291.5496650148827,test,0.6451612903225806,0.04107344757545451,0.6428384393820372,0.041249545666987304,0.6433823529411764,0.041261189041600414 +flat_mae,patch,logistic,abide_dx,79,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,79,21.54434690031882,test,0.5564516129032258,0.045159939495926636,0.5529334644378892,0.04539355083569875,0.553046218487395,0.04537683092150109 +flat_mae,patch,logistic,abide_dx,80,0.3593813663804626,train,0.9173789173789174,0.010606663895061468,0.9162360105332455,0.010776090434967944,0.9150239940937616,0.010864180324572452 +flat_mae,patch,logistic,abide_dx,80,0.3593813663804626,test,0.6532258064516129,0.04332559497595863,0.6532032520325204,0.043432073821704775,0.6586134453781513,0.04308902404950235 +flat_mae,patch,logistic,abide_dx,81,2.782559402207126,train,0.9985754985754985,0.0014495126221691055,0.998559926150059,0.0014662494649711024,0.9984126984126984,0.001615171207559865 +flat_mae,patch,logistic,abide_dx,81,2.782559402207126,test,0.6370967741935484,0.04079317647129364,0.6330637206549615,0.041215956274162385,0.6328781512605042,0.04095087611731206 +flat_mae,patch,logistic,abide_dx,82,0.005994842503189409,train,0.7193732193732194,0.016843533022214092,0.7113553113553113,0.017544721718220514,0.7097452934662237,0.017239734808306564 +flat_mae,patch,logistic,abide_dx,82,0.005994842503189409,test,0.6290322580645161,0.04341803669570844,0.6169755573462261,0.045335082058146015,0.6176470588235294,0.044038023385562675 +flat_mae,patch,logistic,abide_dx,83,0.046415888336127774,train,0.8290598290598291,0.013967919208779633,0.8259389050515737,0.014354406423678277,0.8239940937615357,0.014413377503695183 +flat_mae,patch,logistic,abide_dx,83,0.046415888336127774,test,0.6451612903225806,0.0443571143111272,0.6428384393820372,0.04490256131615662,0.6433823529411764,0.045040070878474565 +flat_mae,patch,logistic,abide_dx,84,0.3593813663804626,train,0.9230769230769231,0.010175500346353594,0.9223043877484095,0.01027096578448886,0.9225544481358434,0.010276079357824633 +flat_mae,patch,logistic,abide_dx,84,0.3593813663804626,test,0.5645161290322581,0.04402989477292537,0.5644028103044496,0.044115388326059705,0.5682773109243697,0.04407313786201485 +flat_mae,patch,logistic,abide_dx,85,0.3593813663804626,train,0.9145299145299145,0.010757520880060578,0.9134615384615385,0.010918234000148762,0.9127353266888151,0.011027834046331923 +flat_mae,patch,logistic,abide_dx,85,0.3593813663804626,test,0.6612903225806451,0.0425775261565397,0.6569169960474308,0.04339476337921228,0.6565126050420168,0.04307600730333967 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,train,0.8219373219373219,0.013708234113210102,0.8191597885560604,0.014054229945178805,0.8178294573643411,0.014190020185803273 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,test,0.6290322580645161,0.04436086762545148,0.6227513227513227,0.045774167867267915,0.6223739495798319,0.045247274014398466 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,train,0.9216524216524217,0.010211320243314132,0.9208427043461682,0.010303639849570863,0.9209671465485418,0.01025893141417375 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,test,0.6370967741935484,0.04302938944348725,0.6368842324461508,0.043106527847281376,0.6407563025210083,0.04305366586812725 +flat_mae,patch,logistic,abide_dx,88,2.782559402207126,train,0.9914529914529915,0.0036507100718269512,0.9913621262458472,0.0036890462803701274,0.9913621262458472,0.003685154597311126 +flat_mae,patch,logistic,abide_dx,88,2.782559402207126,test,0.5967741935483871,0.04332706903946386,0.5915678524374176,0.04404242351930042,0.5913865546218487,0.0437405872101056 +flat_mae,patch,logistic,abide_dx,89,0.000774263682681127,train,0.6894586894586895,0.01598667348479169,0.6719907407407408,0.017559924368347836,0.6728682170542635,0.016511308071592224 +flat_mae,patch,logistic,abide_dx,89,0.000774263682681127,test,0.5887096774193549,0.042055980729124004,0.5740553647201454,0.04408184847415813,0.576155462184874,0.04251462397472721 +flat_mae,patch,logistic,abide_dx,90,2.782559402207126,train,0.9943019943019943,0.0027658447755183266,0.9942447694628451,0.0027922272102590545,0.9945367294204503,0.002681760455073407 +flat_mae,patch,logistic,abide_dx,90,2.782559402207126,test,0.6209677419354839,0.04405455833738171,0.6118548118548119,0.04540958284168729,0.6118697478991597,0.044574256684535817 +flat_mae,patch,logistic,abide_dx,91,2.782559402207126,train,0.9928774928774928,0.0030490666860175488,0.9928038822132881,0.00307850347826223,0.9929494278331488,0.0030004917092882636 +flat_mae,patch,logistic,abide_dx,91,2.782559402207126,test,0.5967741935483871,0.042496161912757444,0.58994708994709,0.043359028820421676,0.5898109243697479,0.04300432856990718 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,train,0.9943019943019943,0.0028516953089373306,0.9942414174972314,0.002882369908441518,0.9942414174972314,0.0028999533678778664 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,test,0.6048387096774194,0.043205146723902264,0.6004471624909581,0.04339853860387639,0.6003151260504203,0.04313197555109993 +flat_mae,patch,logistic,abide_dx,93,0.005994842503189409,train,0.7293447293447294,0.016044536881460743,0.7217470397289651,0.016774203226238387,0.7199704688076781,0.01651369954497792 +flat_mae,patch,logistic,abide_dx,93,0.005994842503189409,test,0.5564516129032258,0.04514774854576142,0.5376584638329605,0.04768133916298761,0.542016806722689,0.04567978406006964 +flat_mae,patch,logistic,abide_dx,94,0.046415888336127774,train,0.8091168091168092,0.014761700480400057,0.8062083855231064,0.01506455742286792,0.8050203026947212,0.015112581805423003 +flat_mae,patch,logistic,abide_dx,94,0.046415888336127774,test,0.6209677419354839,0.044076047586684576,0.6189604445897352,0.04447211580082471,0.6197478991596639,0.044591352531199974 +flat_mae,patch,logistic,abide_dx,95,0.046415888336127774,train,0.8048433048433048,0.015159364790703565,0.8019395402189964,0.015402753830457784,0.8008490217792543,0.015386588070777842 +flat_mae,patch,logistic,abide_dx,95,0.046415888336127774,test,0.6532258064516129,0.04333056636220568,0.6448884448884449,0.04522132117033592,0.6444327731092437,0.04435618201376702 +flat_mae,patch,logistic,abide_dx,96,0.3593813663804626,train,0.9188034188034188,0.010248285401669752,0.9178146341463416,0.010380866635679973,0.917201919527501,0.010450279742871233 +flat_mae,patch,logistic,abide_dx,96,0.3593813663804626,test,0.6370967741935484,0.04540522828334925,0.6351748937561295,0.04555807068059736,0.6360294117647058,0.04550355303189617 +flat_mae,patch,logistic,abide_dx,97,0.046415888336127774,train,0.8034188034188035,0.014917400682090687,0.8002803776852347,0.015286822746622419,0.7989664082687338,0.015362637083836493 +flat_mae,patch,logistic,abide_dx,97,0.046415888336127774,test,0.6774193548387096,0.041961356782870045,0.6743697478991597,0.04252339113435198,0.6743697478991597,0.04252518320339834 +flat_mae,patch,logistic,abide_dx,98,0.046415888336127774,train,0.8219373219373219,0.014567928319261245,0.8191597885560604,0.01487050773618455,0.8178294573643411,0.014884495165479874 +flat_mae,patch,logistic,abide_dx,98,0.046415888336127774,test,0.5725806451612904,0.04488942073197699,0.5643931861867832,0.04610272171366664,0.5646008403361344,0.04552168369616218 +flat_mae,patch,logistic,abide_dx,99,0.046415888336127774,train,0.8148148148148148,0.014579107552458778,0.8117201426024956,0.014877917661238922,0.810188261351052,0.014890417659683004 +flat_mae,patch,logistic,abide_dx,99,0.046415888336127774,test,0.6532258064516129,0.04165320316646794,0.6513893429225237,0.041771605536080954,0.6523109243697479,0.04174381092633018 +flat_mae,patch,logistic,abide_dx,100,0.046415888336127774,train,0.8034188034188035,0.014527964507993317,0.8001336898395722,0.014917791108645155,0.7986710963455149,0.014998033427616098 +flat_mae,patch,logistic,abide_dx,100,0.046415888336127774,test,0.6290322580645161,0.04270227922962411,0.6191239316239316,0.04398560197563504,0.6192226890756303,0.0430599411110248 diff --git a/data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic/log.txt b/data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2ce93f92a860da297f256df613111fe0564d041c --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:20:26 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/abide_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:18:12 time: 3.7787 data: 2.9693 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:29 time: 0.1613 data: 0.0473 max mem: 2851 +extract (train) [ 40/289] eta: 0:00:59 time: 0.1364 data: 0.0340 max mem: 2851 +extract (train) [ 60/289] eta: 0:00:47 time: 0.1525 data: 0.0417 max mem: 2851 +extract (train) [ 80/289] eta: 0:00:40 time: 0.1450 data: 0.0375 max mem: 2851 +extract (train) [100/289] eta: 0:00:34 time: 0.1469 data: 0.0391 max mem: 2851 +extract (train) [120/289] eta: 0:00:30 time: 0.1501 data: 0.0415 max mem: 2851 +extract (train) [140/289] eta: 0:00:25 time: 0.1395 data: 0.0353 max mem: 2851 +extract (train) [160/289] eta: 0:00:21 time: 0.1447 data: 0.0394 max mem: 2851 +extract (train) [180/289] eta: 0:00:18 time: 0.1510 data: 0.0409 max mem: 2851 +extract (train) [200/289] eta: 0:00:14 time: 0.1626 data: 0.0450 max mem: 2851 +extract (train) [220/289] eta: 0:00:11 time: 0.1618 data: 0.0448 max mem: 2851 +extract (train) [240/289] eta: 0:00:08 time: 0.1402 data: 0.0355 max mem: 2851 +extract (train) [260/289] eta: 0:00:04 time: 0.1695 data: 0.0486 max mem: 2851 +extract (train) [280/289] eta: 0:00:01 time: 0.1442 data: 0.0369 max mem: 2851 +extract (train) [288/289] eta: 0:00:00 time: 0.1389 data: 0.0348 max mem: 2851 +extract (train) Total time: 0:00:47 (0.1640 s / it) +extract (validation) [ 0/62] eta: 0:03:57 time: 3.8324 data: 3.6282 max mem: 2851 +extract (validation) [20/62] eta: 0:00:15 time: 0.2019 data: 0.0561 max mem: 2851 +extract (validation) [40/62] eta: 0:00:05 time: 0.1622 data: 0.0424 max mem: 2851 +extract (validation) [60/62] eta: 0:00:00 time: 0.1376 data: 0.0342 max mem: 2851 +extract (validation) [61/62] eta: 0:00:00 time: 0.1381 data: 0.0345 max mem: 2851 +extract (validation) Total time: 0:00:14 (0.2306 s / it) +extract (test) [ 0/62] eta: 0:03:51 time: 3.7271 data: 3.5164 max mem: 2851 +extract (test) [20/62] eta: 0:00:16 time: 0.2174 data: 0.0644 max mem: 2851 +extract (test) [40/62] eta: 0:00:05 time: 0.1467 data: 0.0363 max mem: 2851 +extract (test) [60/62] eta: 0:00:00 time: 0.1386 data: 0.0354 max mem: 2851 +extract (test) [61/62] eta: 0:00:00 time: 0.1384 data: 0.0354 max mem: 2851 +extract (test) Total time: 0:00:14 (0.2290 s / it) +feature extraction time: 0:01:16 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | | 0.046416 | train | 0.80769 | 0.015023 | 0.80469 | 0.015309 | 0.80356 | 0.015319 | +| flat_mae | patch | logistic | abide_dx | | 0.046416 | test | 0.64516 | 0.043225 | 0.64284 | 0.043592 | 0.64284 | 0.043448 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 2.782559402207126, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04315406600902564, "f1": 0.6418067226890756, "f1_std": 0.043853897294788585, "bacc": 0.6418067226890756, "bacc_std": 0.04374980580424382} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04283227215524843, "f1": 0.6569169960474308, "f1_std": 0.043587852597675246, "bacc": 0.6565126050420168, "bacc_std": 0.04327851883971466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 10000.0, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04284558339723388, "f1": 0.6035753898349319, "f1_std": 0.042888159933636594, "bacc": 0.6050420168067226, "bacc_std": 0.04271828205266412} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.7258064516129032, "acc_std": 0.041557026751496036, "f1": 0.7246603970741902, "f1_std": 0.04177510511188718, "bacc": 0.7263655462184874, "bacc_std": 0.04191788348324183} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.0443420626761152, "f1": 0.6428384393820372, "f1_std": 0.044669201076798955, "bacc": 0.6433823529411764, "bacc_std": 0.04481200603584851} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.047147189101435234, "f1": 0.5989703649924097, "f1_std": 0.047699611889035626, "bacc": 0.5987394957983193, "bacc_std": 0.04727771312706628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.04272298727897597, "f1": 0.6869519000797236, "f1_std": 0.04397862950558822, "bacc": 0.6859243697478992, "bacc_std": 0.04328323320618852} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 1291.5496650148827, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04701133157618572, "f1": 0.5941345902068604, "f1_std": 0.04750201782040206, "bacc": 0.5945378151260504, "bacc_std": 0.04751588633058962} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 10000.0, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04209484299117427, "f1": 0.6580882352941176, "f1_std": 0.04259248340526784, "bacc": 0.6580882352941176, "bacc_std": 0.042533219587874434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 2.782559402207126, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.042173629016143015, "f1": 0.6829891838741396, "f1_std": 0.042365119017347845, "bacc": 0.6832983193277311, "bacc_std": 0.04213584580354409} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 2.782559402207126, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.0419770265696058, "f1": 0.5588932806324111, "f1_std": 0.04231845519430366, "bacc": 0.5588235294117647, "bacc_std": 0.04202422171966164} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04517915545258621, "f1": 0.5703170970905524, "f1_std": 0.04534129572610806, "bacc": 0.5709033613445378, "bacc_std": 0.04531358437311203} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 166.81005372000556, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.042504208030642976, "f1": 0.5886829268292683, "f1_std": 0.04256958207030331, "bacc": 0.5934873949579832, "bacc_std": 0.04285493145967109} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 21.54434690031882, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.041448823177117476, "f1": 0.6732542819499341, "f1_std": 0.04249841731098369, "bacc": 0.6727941176470589, "bacc_std": 0.04234986706186723} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04128413512107416, "f1": 0.6063492063492064, "f1_std": 0.04215004040000179, "bacc": 0.60609243697479, "bacc_std": 0.04168581841441776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04215580769939964, "f1": 0.6569169960474308, "f1_std": 0.042888327361309195, "bacc": 0.6565126050420168, "bacc_std": 0.04270704596111274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.03763943553216522, "f1": 0.6761968530297957, "f1_std": 0.039557096088534305, "bacc": 0.6754201680672269, "bacc_std": 0.03872134417024648} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 166.81005372000556, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04127916620954516, "f1": 0.6522435897435898, "f1_std": 0.0432351627213289, "bacc": 0.6517857142857143, "bacc_std": 0.04216105400839574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04139282576696095, "f1": 0.6553454003118433, "f1_std": 0.04473095336025959, "bacc": 0.6559873949579832, "bacc_std": 0.042654001070094476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04198490883924048, "f1": 0.6118548118548119, "f1_std": 0.04341000539334493, "bacc": 0.6118697478991597, "bacc_std": 0.04262800165133694} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04482304205266927, "f1": 0.6179613241560145, "f1_std": 0.045105198254835395, "bacc": 0.618172268907563, "bacc_std": 0.044950100375050754} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.040528413357569265, "f1": 0.6808131476470201, "f1_std": 0.04155361725552617, "bacc": 0.6801470588235294, "bacc_std": 0.041286066366324106} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04473473672813193, "f1": 0.6405797101449275, "f1_std": 0.04550981119611706, "bacc": 0.6402310924369747, "bacc_std": 0.045267181402848494} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 1291.5496650148827, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04364654409749443, "f1": 0.6521171788347361, "f1_std": 0.04366482493733975, "bacc": 0.6538865546218487, "bacc_std": 0.04366658880092021} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04230751539006691, "f1": 0.6530227110040997, "f1_std": 0.042368543502131546, "bacc": 0.6570378151260504, "bacc_std": 0.042451516665635965} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 2.782559402207126, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04605660906203847, "f1": 0.6035753898349319, "f1_std": 0.046211346877024106, "bacc": 0.6050420168067226, "bacc_std": 0.04645532524166727} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04488951635359795, "f1": 0.5929621848739496, "f1_std": 0.045110436897883986, "bacc": 0.5929621848739496, "bacc_std": 0.045099040217754106} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 2.782559402207126, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04399447747009622, "f1": 0.6351748937561295, "f1_std": 0.04423286606922299, "bacc": 0.6360294117647058, "bacc_std": 0.044260688416216436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 2.782559402207126, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04133937044013971, "f1": 0.6688163637548042, "f1_std": 0.041428873793280654, "bacc": 0.6717436974789917, "bacc_std": 0.041605944231173975} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.041810418345264555, "f1": 0.6480760345851759, "f1_std": 0.04282099136622034, "bacc": 0.6475840336134454, "bacc_std": 0.04241237638858726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 10000.0, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.044143422832812404, "f1": 0.5386659580122243, "f1_std": 0.04490701653975853, "bacc": 0.539390756302521, "bacc_std": 0.04425953469133911} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 2.782559402207126, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.043603310645770825, "f1": 0.6191239316239316, "f1_std": 0.044692035161611784, "bacc": 0.6192226890756303, "bacc_std": 0.043857318601906746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04108751541523139, "f1": 0.6217205613178767, "f1_std": 0.04417873017802807, "bacc": 0.6234243697478992, "bacc_std": 0.042146266877715666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04349792775572952, "f1": 0.5588932806324111, "f1_std": 0.044009632162743506, "bacc": 0.5588235294117647, "bacc_std": 0.04379720389710001} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04317668128112225, "f1": 0.6569169960474308, "f1_std": 0.04388080405654146, "bacc": 0.6565126050420168, "bacc_std": 0.04358627245822747} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04330260390187433, "f1": 0.6227513227513227, "f1_std": 0.04445636638127347, "bacc": 0.6223739495798319, "bacc_std": 0.04404975031221714} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04255647835707375, "f1": 0.6242424242424243, "f1_std": 0.04344865824041677, "bacc": 0.6239495798319328, "bacc_std": 0.04317765453077027} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 21.54434690031882, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.04208442533003155, "f1": 0.6869519000797236, "f1_std": 0.0433402465549648, "bacc": 0.6859243697478992, "bacc_std": 0.042839182817647134} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04213123975345814, "f1": 0.5860042735042735, "f1_std": 0.04339517591280546, "bacc": 0.5866596638655462, "bacc_std": 0.04251203404481154} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.042842863187684196, "f1": 0.6049081418208935, "f1_std": 0.04630340262026788, "bacc": 0.6071428571428572, "bacc_std": 0.044003501244538744} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 21.54434690031882, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04056280378608651, "f1": 0.5766806722689075, "f1_std": 0.04119806875233372, "bacc": 0.5766806722689075, "bacc_std": 0.04100330391895227} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 2.782559402207126, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04538348565316704, "f1": 0.5788923719958203, "f1_std": 0.04549809376664079, "bacc": 0.5798319327731092, "bacc_std": 0.045594096939506494} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 166.81005372000556, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.041061801471226685, "f1": 0.6167554415729598, "f1_std": 0.04173116621886512, "bacc": 0.6165966386554622, "bacc_std": 0.04165952078991606} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.044410378716839984, "f1": 0.5623043623043623, "f1_std": 0.045657614767207845, "bacc": 0.5630252100840336, "bacc_std": 0.0447917483011613} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.040768821103798765, "f1": 0.6118548118548119, "f1_std": 0.042217892175366895, "bacc": 0.6118697478991597, "bacc_std": 0.04132544718510285} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 0.000774263682681127, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04253791538824539, "f1": 0.5174438451560046, "f1_std": 0.044847394023006776, "bacc": 0.5241596638655462, "bacc_std": 0.04273646297939909} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04199716307747072, "f1": 0.6317074780542539, "f1_std": 0.0427370529042198, "bacc": 0.6313025210084033, "bacc_std": 0.042378926557957046} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04151339690736908, "f1": 0.6301451580831179, "f1_std": 0.04271308281199993, "bacc": 0.6297268907563025, "bacc_std": 0.042062292330763015} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.0436074002779464, "f1": 0.6137071651090342, "f1_std": 0.044923908723406784, "bacc": 0.6134453781512605, "bacc_std": 0.04434425857556162} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04724037707095966, "f1": 0.5880946053680574, "f1_std": 0.048620839071026385, "bacc": 0.5882352941176471, "bacc_std": 0.04785585443813488} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.045006693917666776, "f1": 0.5616653574234092, "f1_std": 0.045693244711812256, "bacc": 0.5619747899159664, "bacc_std": 0.04573902997715608} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 21.54434690031882, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04131049154583597, "f1": 0.5808311791608669, "f1_std": 0.04237888167746313, "bacc": 0.5808823529411764, "bacc_std": 0.04182519286053294} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.0442763058430251, "f1": 0.6418067226890756, "f1_std": 0.044582028371541904, "bacc": 0.6418067226890756, "bacc_std": 0.04444034586917256} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 21.54434690031882, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.042655420658463415, "f1": 0.6405797101449275, "f1_std": 0.04336799328094336, "bacc": 0.6402310924369747, "bacc_std": 0.04317404935859212} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 166.81005372000556, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04710709122525476, "f1": 0.5796610169491525, "f1_std": 0.04720669215907874, "bacc": 0.58140756302521, "bacc_std": 0.04716689954868565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.717741935483871, "acc_std": 0.04074938953086779, "f1": 0.710955710955711, "f1_std": 0.04234767929232972, "bacc": 0.7095588235294117, "bacc_std": 0.04165388557120636} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04221403764135138, "f1": 0.6145945945945945, "f1_std": 0.044986439868740555, "bacc": 0.6160714285714286, "bacc_std": 0.0431033548379039} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04345151711079797, "f1": 0.5854473942969518, "f1_std": 0.043649732173370655, "bacc": 0.585609243697479, "bacc_std": 0.043616131507200576} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.043948354421975616, "f1": 0.6590730557737627, "f1_std": 0.04434662818021113, "bacc": 0.6596638655462186, "bacc_std": 0.044411496087689274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04285740944369791, "f1": 0.5571428571428572, "f1_std": 0.044257011616676105, "bacc": 0.5572478991596639, "bacc_std": 0.04359402238435234} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04275506797642934, "f1": 0.6153389215233318, "f1_std": 0.043364271345563546, "bacc": 0.6150210084033614, "bacc_std": 0.04303148828429284} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.044591380995715954, "f1": 0.6092436974789917, "f1_std": 0.04496618195515294, "bacc": 0.6092436974789917, "bacc_std": 0.04496221435903084} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04251364474605974, "f1": 0.6513893429225237, "f1_std": 0.042828933961936426, "bacc": 0.6523109243697479, "bacc_std": 0.04286573426562781} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04162876354414529, "f1": 0.6667322189446083, "f1_std": 0.042027284733257224, "bacc": 0.6670168067226891, "bacc_std": 0.04201562204212226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 10000.0, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04322006160669094, "f1": 0.5953379953379954, "f1_std": 0.04410923997215498, "bacc": 0.5955882352941176, "bacc_std": 0.04338512030311183} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04442157735672521, "f1": 0.5616653574234092, "f1_std": 0.04464031303680255, "bacc": 0.5619747899159664, "bacc_std": 0.04462763413214361} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 21.54434690031882, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.0440312861762055, "f1": 0.6760710553814002, "f1_std": 0.044205117682407334, "bacc": 0.6775210084033614, "bacc_std": 0.044376538753704724} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.03988968920524124, "f1": 0.6766623207301173, "f1_std": 0.03985090955607941, "bacc": 0.6790966386554622, "bacc_std": 0.0396630403764} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.03918475608270628, "f1": 0.6744957170489085, "f1_std": 0.04368269694370768, "bacc": 0.6764705882352942, "bacc_std": 0.040576937504884024} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.043387429546631266, "f1": 0.6448884448884449, "f1_std": 0.04505532105085936, "bacc": 0.6444327731092437, "bacc_std": 0.044272195026281394} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04184497366494297, "f1": 0.5836690840719849, "f1_std": 0.04386788504814251, "bacc": 0.5850840336134454, "bacc_std": 0.042524133223668825} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04588841769174245, "f1": 0.5643931861867832, "f1_std": 0.04710866956910183, "bacc": 0.5646008403361344, "bacc_std": 0.04645143615475117} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04054969587540614, "f1": 0.5503626107977437, "f1_std": 0.042893102688721346, "bacc": 0.5525210084033614, "bacc_std": 0.04130253269606253} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 166.81005372000556, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04411038501982605, "f1": 0.6532032520325204, "f1_std": 0.044142369111108797, "bacc": 0.6586134453781513, "bacc_std": 0.044005495943674026} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 2.782559402207126, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.0405679341855003, "f1": 0.6569169960474308, "f1_std": 0.04136011726842881, "bacc": 0.6565126050420168, "bacc_std": 0.04115062979413037} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04510328936739077, "f1": 0.5752305665349143, "f1_std": 0.04580812510554027, "bacc": 0.5751050420168067, "bacc_std": 0.04547954213225819} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 166.81005372000556, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04299082737391135, "f1": 0.5603991596638656, "f1_std": 0.04326951671349658, "bacc": 0.5603991596638656, "bacc_std": 0.04316104476965729} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 1291.5496650148827, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04107344757545451, "f1": 0.6428384393820372, "f1_std": 0.041249545666987304, "bacc": 0.6433823529411764, "bacc_std": 0.041261189041600414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 21.54434690031882, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.045159939495926636, "f1": 0.5529334644378892, "f1_std": 0.04539355083569875, "bacc": 0.553046218487395, "bacc_std": 0.04537683092150109} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04332559497595863, "f1": 0.6532032520325204, "f1_std": 0.043432073821704775, "bacc": 0.6586134453781513, "bacc_std": 0.04308902404950235} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 2.782559402207126, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04079317647129364, "f1": 0.6330637206549615, "f1_std": 0.041215956274162385, "bacc": 0.6328781512605042, "bacc_std": 0.04095087611731206} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04341803669570844, "f1": 0.6169755573462261, "f1_std": 0.045335082058146015, "bacc": 0.6176470588235294, "bacc_std": 0.044038023385562675} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.0443571143111272, "f1": 0.6428384393820372, "f1_std": 0.04490256131615662, "bacc": 0.6433823529411764, "bacc_std": 0.045040070878474565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04402989477292537, "f1": 0.5644028103044496, "f1_std": 0.044115388326059705, "bacc": 0.5682773109243697, "bacc_std": 0.04407313786201485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.0425775261565397, "f1": 0.6569169960474308, "f1_std": 0.04339476337921228, "bacc": 0.6565126050420168, "bacc_std": 0.04307600730333967} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04436086762545148, "f1": 0.6227513227513227, "f1_std": 0.045774167867267915, "bacc": 0.6223739495798319, "bacc_std": 0.045247274014398466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04302938944348725, "f1": 0.6368842324461508, "f1_std": 0.043106527847281376, "bacc": 0.6407563025210083, "bacc_std": 0.04305366586812725} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 2.782559402207126, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04332706903946386, "f1": 0.5915678524374176, "f1_std": 0.04404242351930042, "bacc": 0.5913865546218487, "bacc_std": 0.0437405872101056} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 0.000774263682681127, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.042055980729124004, "f1": 0.5740553647201454, "f1_std": 0.04408184847415813, "bacc": 0.576155462184874, "bacc_std": 0.04251462397472721} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 2.782559402207126, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04405455833738171, "f1": 0.6118548118548119, "f1_std": 0.04540958284168729, "bacc": 0.6118697478991597, "bacc_std": 0.044574256684535817} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 2.782559402207126, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.042496161912757444, "f1": 0.58994708994709, "f1_std": 0.043359028820421676, "bacc": 0.5898109243697479, "bacc_std": 0.04300432856990718} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.043205146723902264, "f1": 0.6004471624909581, "f1_std": 0.04339853860387639, "bacc": 0.6003151260504203, "bacc_std": 0.04313197555109993} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04514774854576142, "f1": 0.5376584638329605, "f1_std": 0.04768133916298761, "bacc": 0.542016806722689, "bacc_std": 0.04567978406006964} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.044076047586684576, "f1": 0.6189604445897352, "f1_std": 0.04447211580082471, "bacc": 0.6197478991596639, "bacc_std": 0.044591352531199974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04333056636220568, "f1": 0.6448884448884449, "f1_std": 0.04522132117033592, "bacc": 0.6444327731092437, "bacc_std": 0.04435618201376702} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04540522828334925, "f1": 0.6351748937561295, "f1_std": 0.04555807068059736, "bacc": 0.6360294117647058, "bacc_std": 0.04550355303189617} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.041961356782870045, "f1": 0.6743697478991597, "f1_std": 0.04252339113435198, "bacc": 0.6743697478991597, "bacc_std": 0.04252518320339834} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04488942073197699, "f1": 0.5643931861867832, "f1_std": 0.04610272171366664, "bacc": 0.5646008403361344, "bacc_std": 0.04552168369616218} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04165320316646794, "f1": 0.6513893429225237, "f1_std": 0.041771605536080954, "bacc": 0.6523109243697479, "bacc_std": 0.04174381092633018} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04270227922962411, "f1": 0.6191239316239316, "f1_std": 0.04398560197563504, "bacc": 0.6192226890756303, "bacc_std": 0.0430599411110248} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | train | 100 | 450.76 | 1971.7 | 0.89679 | 0.092351 | 0.89472 | 0.094941 | 0.8941 | 0.095465 | +| flat_mae | patch | logistic | abide_dx | test | 100 | 450.76 | 1971.7 | 0.62548 | 0.040982 | 0.61966 | 0.042188 | 0.62034 | 0.041933 | + + +done! total time: 0:05:29 diff --git a/data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic/config.yaml b/data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..639faaecd84f95872b798055906dcf028d1a89e5 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv b/data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..2d8aa0a21550feb4f58144774902360fd47e57fe --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,train,0.7589041095890411,0.02162984531369189,0.7504971414367387,0.022650277789996946,0.7476644074006228,0.02240074777954313 +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,test,0.6307692307692307,0.05858904314230914,0.6036585365853658,0.06590209331528313,0.6061776061776062,0.06126223390273478 +flat_mae,patch,logistic,adhd200_dx,1,0.005994842503189409,train,0.7561643835616438,0.022925557893513564,0.7473969875817451,0.02416544021820604,0.7445197533125725,0.02386885555721984 +flat_mae,patch,logistic,adhd200_dx,1,0.005994842503189409,test,0.6,0.05687315898574434,0.5775,0.060290791754569605,0.5791505791505791,0.057933054764911535 +flat_mae,patch,logistic,adhd200_dx,2,0.046415888336127774,train,0.8410958904109589,0.018636820718883028,0.8371237766972364,0.019248626202751556,0.8348293338218233,0.01934782898142414 +flat_mae,patch,logistic,adhd200_dx,2,0.046415888336127774,test,0.6461538461538462,0.05578402138085886,0.6289401836684041,0.05968094560415457,0.6283783783783784,0.05752966114564144 +flat_mae,patch,logistic,adhd200_dx,3,0.000774263682681127,train,0.7150684931506849,0.02315190075432657,0.7025078369905956,0.02476569283051763,0.7002198204799414,0.024162319379137966 +flat_mae,patch,logistic,adhd200_dx,3,0.000774263682681127,test,0.5692307692307692,0.05977697405539139,0.5512820512820513,0.062463142528234775,0.5521235521235521,0.06089063844054056 +flat_mae,patch,logistic,adhd200_dx,4,0.046415888336127774,train,0.8301369863013699,0.018462695572189085,0.8258909337108389,0.0190447511645191,0.8236856567136839,0.01910404001322068 +flat_mae,patch,logistic,adhd200_dx,4,0.046415888336127774,test,0.6461538461538462,0.06090439496000094,0.6407113674597452,0.06188663055524596,0.6414092664092663,0.06189718308359479 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,train,0.7863013698630137,0.0219274903272363,0.7783814920911696,0.02326255501859857,0.7748061305489405,0.02305143352540494 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,test,0.5076923076923077,0.06250698611842893,0.5066413662239089,0.06284448377491744,0.5111003861003861,0.06397993758826467 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,train,0.7589041095890411,0.020167488593572813,0.7482758620689656,0.021449852779127233,0.7447945289124992,0.020994357344972393 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,test,0.6461538461538462,0.05822523091231928,0.6336682185738789,0.060916278468128957,0.6327220077220077,0.05971745127252575 +flat_mae,patch,logistic,adhd200_dx,7,0.046415888336127774,train,0.8383561643835616,0.019604453367943794,0.8332287867171588,0.020514393549752385,0.8295322708676803,0.020603003219608954 +flat_mae,patch,logistic,adhd200_dx,7,0.046415888336127774,test,0.6,0.05940116150519012,0.5921814671814671,0.06059235440050431,0.5921814671814671,0.060227089811499164 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,train,0.7643835616438356,0.020906770867779794,0.754566210045662,0.022409143392165008,0.7510838370885998,0.022052900561833257 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,test,0.7076923076923077,0.05772658673928373,0.6934723256391164,0.06203714844494515,0.6911196911196911,0.06015470888144337 +flat_mae,patch,logistic,adhd200_dx,9,0.046415888336127774,train,0.8602739726027397,0.018212672440306558,0.8563934426229508,0.018970280647007814,0.853254564328021,0.0191908264406227 +flat_mae,patch,logistic,adhd200_dx,9,0.046415888336127774,test,0.5692307692307692,0.06393099830581951,0.564176245210728,0.06450444271486197,0.5651544401544402,0.06476597677360901 +flat_mae,patch,logistic,adhd200_dx,10,0.005994842503189409,train,0.7671232876712328,0.022390987517562383,0.7587499319600937,0.02355268165755083,0.7556634304207119,0.02327864405728499 +flat_mae,patch,logistic,adhd200_dx,10,0.005994842503189409,test,0.6,0.05866893576390013,0.5833333333333333,0.06148898240968189,0.5834942084942085,0.05980521627926769 +flat_mae,patch,logistic,adhd200_dx,11,0.005994842503189409,train,0.7671232876712328,0.0214067339103511,0.7559795817242274,0.023017054075801593,0.7520760823105574,0.022473123630505965 +flat_mae,patch,logistic,adhd200_dx,11,0.005994842503189409,test,0.5846153846153846,0.057128204207521974,0.5699583435432491,0.059513438420802985,0.5699806949806949,0.058284035366820204 +flat_mae,patch,logistic,adhd200_dx,12,0.046415888336127774,train,0.8301369863013699,0.019730303189559987,0.8255796029103466,0.02045577582096023,0.822968187091653,0.020552217422756643 +flat_mae,patch,logistic,adhd200_dx,12,0.046415888336127774,test,0.5692307692307692,0.058593300907549176,0.5512820512820513,0.061481823260508384,0.5521235521235521,0.05952379381804377 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,train,0.7506849315068493,0.022662220780490527,0.7417205153925708,0.023672216220123728,0.7389479147585027,0.023317332549040733 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,test,0.6615384615384615,0.05620752755500062,0.6575670498084292,0.05687973184477358,0.6592664092664093,0.05712165929996483 +flat_mae,patch,logistic,adhd200_dx,14,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,14,166.81005372000556,test,0.5538461538461539,0.062257484044302876,0.5521501544309813,0.062437789287548354,0.555984555984556,0.06253361281411537 +flat_mae,patch,logistic,adhd200_dx,15,0.005994842503189409,train,0.7671232876712328,0.02227586707810587,0.7592516431414847,0.023520655306265983,0.7563809000427428,0.02333294069470713 +flat_mae,patch,logistic,adhd200_dx,15,0.005994842503189409,test,0.5692307692307692,0.06079342069665104,0.5608108108108107,0.06159096857721931,0.5608108108108107,0.061467094643038345 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,train,0.7698630136986301,0.021053913761312725,0.7641102972856528,0.02164896418043694,0.7623954326189167,0.021560376937272713 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,test,0.6307692307692307,0.061070528632521444,0.6235521235521235,0.06221801201152397,0.6235521235521235,0.06208149045894029 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,train,0.7534246575342466,0.01994223295627691,0.7442863370282725,0.02103078270190035,0.7413750992245222,0.020744133836917453 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,test,0.6,0.05686653324715949,0.570630081300813,0.06270939294419887,0.5748069498069498,0.058517693114287214 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,train,0.7315068493150685,0.022184609022088683,0.7203196347031964,0.02354800550289395,0.7176528057641814,0.023017235055765185 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,test,0.6461538461538462,0.05622191027763351,0.6233308138070043,0.06167850873617281,0.6240347490347491,0.05819478120036686 +flat_mae,patch,logistic,adhd200_dx,19,0.046415888336127774,train,0.8136986301369863,0.0206854388035407,0.8087002096436059,0.021444585698738732,0.8062526714294437,0.021461450946513475 +flat_mae,patch,logistic,adhd200_dx,19,0.046415888336127774,test,0.676923076923077,0.05866280335136926,0.6719538572458543,0.060002484258066535,0.6727799227799228,0.060091977733538006 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,train,0.7561643835616438,0.022320462360129897,0.7484298647089345,0.023260075400230635,0.7459546925566343,0.023018048470305696 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,test,0.6461538461538462,0.05351320359107538,0.6336682185738789,0.05682691420110582,0.6327220077220077,0.055511956321264475 +flat_mae,patch,logistic,adhd200_dx,21,0.005994842503189409,train,0.7424657534246575,0.022063403407862413,0.7304707139265962,0.02376662418517162,0.7273615436282591,0.023134909393289924 +flat_mae,patch,logistic,adhd200_dx,21,0.005994842503189409,test,0.6923076923076923,0.06106820321400477,0.6904761904761905,0.06137632898779881,0.6949806949806949,0.061370674064897296 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,train,0.7534246575342466,0.021405172094176438,0.7437277663358921,0.02270735515831169,0.7406576296024913,0.022389879103219507 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,test,0.6307692307692307,0.06122946268416632,0.6235521235521235,0.06302460190297848,0.6235521235521235,0.06303917241435883 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,train,0.7643835616438356,0.019938053813418368,0.754566210045662,0.021240407923408835,0.7510838370885998,0.020857304951627267 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,test,0.6153846153846154,0.055204699225410575,0.5834401435529352,0.06247001238906739,0.5883204633204633,0.05727929666139208 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,train,0.7452054794520548,0.02287812053741415,0.7354763296317943,0.024126157188354286,0.7326586065824021,0.023716075083636958 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,test,0.6153846153846154,0.05563814620931819,0.5834401435529352,0.0622450777161757,0.5883204633204633,0.057215029905661484 +flat_mae,patch,logistic,adhd200_dx,25,0.000774263682681127,train,0.6876712328767123,0.02246252633019986,0.6606478762967313,0.02562593037571152,0.6623160530011601,0.023512868354307048 +flat_mae,patch,logistic,adhd200_dx,25,0.000774263682681127,test,0.6461538461538462,0.057996620999307184,0.6233308138070043,0.06403036874752153,0.6240347490347491,0.06045991437518356 +flat_mae,patch,logistic,adhd200_dx,26,0.046415888336127774,train,0.8356164383561644,0.018817666964301347,0.8312060673325934,0.0195101087935275,0.8285400256457227,0.01959129467788434 +flat_mae,patch,logistic,adhd200_dx,26,0.046415888336127774,test,0.5692307692307692,0.05811573028456991,0.5565302144249512,0.060251558916601756,0.5564671814671815,0.05940637368331731 +flat_mae,patch,logistic,adhd200_dx,27,0.000774263682681127,train,0.684931506849315,0.023371822172933855,0.672175318843183,0.024790913087657598,0.6706509128656042,0.024188947129897963 +flat_mae,patch,logistic,adhd200_dx,27,0.000774263682681127,test,0.6153846153846154,0.05703211178930629,0.5966741126830479,0.0608086801229079,0.597007722007722,0.058438101460180844 +flat_mae,patch,logistic,adhd200_dx,28,0.005994842503189409,train,0.7698630136986301,0.020940778983417768,0.7618381804623415,0.02200945732485732,0.7588080845087622,0.02179941557917174 +flat_mae,patch,logistic,adhd200_dx,28,0.005994842503189409,test,0.6153846153846154,0.05356444108839768,0.5905769715293525,0.05807956011402474,0.5926640926640927,0.05509674327527941 +flat_mae,patch,logistic,adhd200_dx,29,0.000774263682681127,train,0.673972602739726,0.02544122983746481,0.6575202050068993,0.02707024475167462,0.6566373572693411,0.026102419613699156 +flat_mae,patch,logistic,adhd200_dx,29,0.000774263682681127,test,0.6,0.05994657187270963,0.5775,0.06495994457602072,0.5791505791505791,0.06171941583500162 +flat_mae,patch,logistic,adhd200_dx,30,0.046415888336127774,train,0.8273972602739726,0.01874217639991864,0.8232363996955929,0.01927340212951314,0.8212584722476644,0.019295741299024928 +flat_mae,patch,logistic,adhd200_dx,30,0.046415888336127774,test,0.6,0.06156106507954367,0.5953065134099617,0.062024495704855576,0.5965250965250966,0.06207337116376116 +flat_mae,patch,logistic,adhd200_dx,31,0.005994842503189409,train,0.7616438356164383,0.022000514502931404,0.7519935020813646,0.023190402366368757,0.7486566526225804,0.022759079876265857 +flat_mae,patch,logistic,adhd200_dx,31,0.005994842503189409,test,0.6153846153846154,0.06090936908722295,0.6018132810585641,0.06398252282586593,0.6013513513513513,0.06267481158636691 +flat_mae,patch,logistic,adhd200_dx,32,0.046415888336127774,train,0.8356164383561644,0.018915766904041294,0.8308932542624166,0.01964320435871451,0.8278225560236918,0.01970788027053829 +flat_mae,patch,logistic,adhd200_dx,32,0.046415888336127774,test,0.5538461538461539,0.0628158122783535,0.5469838981014179,0.06394245999381412,0.5472972972972974,0.06391276428040849 +flat_mae,patch,logistic,adhd200_dx,33,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,33,21.54434690031882,test,0.5384615384615384,0.06125531784667386,0.5383522727272727,0.06167709416969898,0.5511583011583012,0.06157844916591004 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,train,0.8328767123287671,0.018567084275742742,0.8285474468855161,0.019268050413209645,0.8261128411797033,0.019396403442219125 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,test,0.6153846153846154,0.060463495760058626,0.6139225469232596,0.0608400465042012,0.6187258687258688,0.06101202760295734 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,train,0.7534246575342466,0.02186435880093315,0.7413059913059913,0.023518105350461227,0.7377877511143677,0.022831479944904035 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,test,0.5692307692307692,0.06095454509753426,0.5512820512820513,0.06360584676706754,0.5521235521235521,0.061909132881585074 +flat_mae,patch,logistic,adhd200_dx,36,0.046415888336127774,train,0.8246575342465754,0.019644026670322903,0.8196194712132444,0.020443184941753526,0.8166788789155524,0.02051792774596333 +flat_mae,patch,logistic,adhd200_dx,36,0.046415888336127774,test,0.6307692307692307,0.059029299715074306,0.6198830409356726,0.06151590602643233,0.6192084942084942,0.06038237553808238 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,train,0.7561643835616438,0.020821271799639798,0.7462922032786373,0.022070191401246227,0.7430848140685107,0.021751647852011273 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,test,0.6307692307692307,0.05391319782213149,0.6036585365853658,0.060416268683395516,0.6061776061776062,0.05594752349950534 +flat_mae,patch,logistic,adhd200_dx,38,0.000774263682681127,train,0.7150684931506849,0.02426405994452609,0.705736434108527,0.025544881772134383,0.7038071685900958,0.025266831453669376 +flat_mae,patch,logistic,adhd200_dx,38,0.000774263682681127,test,0.5384615384615384,0.05838436201424702,0.5192307692307693,0.06038449040782191,0.5207528957528957,0.058702405925690114 +flat_mae,patch,logistic,adhd200_dx,39,0.000774263682681127,train,0.7013698630136986,0.02202912758146303,0.6771353215662406,0.024880056901265896,0.6773218538193808,0.02303227317807917 +flat_mae,patch,logistic,adhd200_dx,39,0.000774263682681127,test,0.7076923076923077,0.05314475932570381,0.6834145091002307,0.06073000792755282,0.6824324324324325,0.056236247529993295 +flat_mae,patch,logistic,adhd200_dx,40,0.046415888336127774,train,0.8465753424657534,0.018430657270320706,0.8415503875968993,0.01937462209511433,0.8375312938877695,0.019526740532157985 +flat_mae,patch,logistic,adhd200_dx,40,0.046415888336127774,test,0.6923076923076923,0.05805947572871711,0.675,0.06312847423068553,0.6732625482625483,0.06047613315133476 +flat_mae,patch,logistic,adhd200_dx,41,0.005994842503189409,train,0.7506849315068493,0.022395975163164945,0.7417205153925708,0.02362793339369894,0.7389479147585027,0.0233228523674616 +flat_mae,patch,logistic,adhd200_dx,41,0.005994842503189409,test,0.6307692307692307,0.05950246576363591,0.6198830409356726,0.06152396985632353,0.6192084942084942,0.060583424993649715 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,train,0.7643835616438356,0.021813980790500315,0.7551176433876303,0.022997198166483417,0.7518013067106307,0.022653512415677018 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,test,0.6,0.06428486897161902,0.5921814671814671,0.06535737067630322,0.5921814671814671,0.06494250595398127 +flat_mae,patch,logistic,adhd200_dx,43,0.005994842503189409,train,0.7616438356164383,0.021914473033917286,0.7530734597709193,0.023155678797241803,0.7500915918666422,0.022908622949762945 +flat_mae,patch,logistic,adhd200_dx,43,0.005994842503189409,test,0.6,0.06062655890754822,0.5833333333333333,0.06277166102990825,0.5834942084942085,0.0613045790725833 +flat_mae,patch,logistic,adhd200_dx,44,0.046415888336127774,train,0.8356164383561644,0.018420952115048784,0.8320758189179243,0.01887874612680856,0.8306924345118154,0.01897220768208655 +flat_mae,patch,logistic,adhd200_dx,44,0.046415888336127774,test,0.6461538461538462,0.055332103310869483,0.6289401836684041,0.05926238341237621,0.6283783783783784,0.05714172376882657 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,train,0.7479452054794521,0.02249770931397362,0.7374429223744292,0.024016060746782218,0.7343683214263906,0.023589671196991898 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,test,0.5384615384615384,0.05798623791023391,0.5125,0.061062661875850015,0.5164092664092664,0.05854938121882263 +flat_mae,patch,logistic,adhd200_dx,46,0.000774263682681127,train,0.6684931506849315,0.023024709694606862,0.6470329670329671,0.02549510835321048,0.6474781706051169,0.024032986725083496 +flat_mae,patch,logistic,adhd200_dx,46,0.000774263682681127,test,0.676923076923077,0.054547337891100764,0.656084656084656,0.059836480438581294,0.6554054054054055,0.05645306785801666 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,train,0.7589041095890411,0.021501846726450648,0.7488584474885844,0.022816798918292223,0.7455119985345301,0.022398110893828076 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,test,0.5846153846153846,0.06073737042118694,0.5699583435432491,0.0628849453603333,0.5699806949806949,0.06167383296840495 +flat_mae,patch,logistic,adhd200_dx,48,0.005994842503189409,train,0.7123287671232876,0.022632789655136494,0.6992914812984017,0.024239757840759154,0.697075166391891,0.023573543144019427 +flat_mae,patch,logistic,adhd200_dx,48,0.005994842503189409,test,0.7846153846153846,0.04928819727411974,0.7804054054054055,0.05067015258726057,0.7804054054054055,0.050877748827951805 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,train,0.7698630136986301,0.020998518565552016,0.7608125819134994,0.02226878708738185,0.7573731452647005,0.021962813863927497 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,test,0.5538461538461539,0.06108239463962789,0.5321419707123356,0.06378864853669464,0.5342664092664092,0.06179659742532306 +flat_mae,patch,logistic,adhd200_dx,50,0.005994842503189409,train,0.7643835616438356,0.021509357850388003,0.7528035028035027,0.02323834929766175,0.7489314282225071,0.022692887909358892 +flat_mae,patch,logistic,adhd200_dx,50,0.005994842503189409,test,0.6615384615384615,0.05332427536493362,0.6366869918699187,0.059724289881772565,0.6375482625482626,0.055570078181126525 +flat_mae,patch,logistic,adhd200_dx,51,0.046415888336127774,train,0.8493150684931506,0.018392929038597357,0.8459431044670744,0.01885781040800901,0.8442632960859743,0.01894339393294757 +flat_mae,patch,logistic,adhd200_dx,51,0.046415888336127774,test,0.5384615384615384,0.06055287679023982,0.5357142857142857,0.06099381124512972,0.5381274131274132,0.061617172930203114 +flat_mae,patch,logistic,adhd200_dx,52,0.046415888336127774,train,0.8410958904109589,0.018716861802771368,0.835891472868217,0.01966586107132552,0.8319594553336997,0.019782478078653933 +flat_mae,patch,logistic,adhd200_dx,52,0.046415888336127774,test,0.6461538461538462,0.05943018578339227,0.6336682185738789,0.06303502621643099,0.6327220077220077,0.061809551996838 +flat_mae,patch,logistic,adhd200_dx,53,0.000774263682681127,train,0.6986301369863014,0.022926364622051644,0.6845933886375063,0.0248678494561409,0.6827868351957013,0.024071633327094978 +flat_mae,patch,logistic,adhd200_dx,53,0.000774263682681127,test,0.5692307692307692,0.06013264233805307,0.5376016260162602,0.06431226753516604,0.5434362934362934,0.060802090322816346 +flat_mae,patch,logistic,adhd200_dx,54,0.005994842503189409,train,0.7589041095890411,0.020360224525492517,0.751976772918211,0.021160058597054124,0.7498168162667155,0.021075472439877466 +flat_mae,patch,logistic,adhd200_dx,54,0.005994842503189409,test,0.6,0.06102415081136747,0.5833333333333333,0.06416582666963139,0.5834942084942085,0.062406250972183025 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,train,0.7808219178082192,0.021504567368686892,0.7745243390165555,0.022350928725293845,0.7721041704829944,0.0223096783998845 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,test,0.5076923076923077,0.05939194056057012,0.5047619047619047,0.059576580006898625,0.5067567567567568,0.059868340562345655 +flat_mae,patch,logistic,adhd200_dx,56,0.000774263682681127,train,0.6794520547945205,0.02268216447951339,0.6587012987012988,0.024906370998055517,0.6586218477132564,0.02356594500440253 +flat_mae,patch,logistic,adhd200_dx,56,0.000774263682681127,test,0.6153846153846154,0.061128255893973076,0.606060606060606,0.06283552272328789,0.6056949806949807,0.062374944610876976 +flat_mae,patch,logistic,adhd200_dx,57,0.005994842503189409,train,0.7671232876712328,0.021598437562945216,0.7597363876433645,0.022484617623543687,0.7570983696647737,0.022262220977497266 +flat_mae,patch,logistic,adhd200_dx,57,0.005994842503189409,test,0.5538461538461539,0.05544881748916513,0.5167905665214048,0.06014532054167831,0.5255791505791506,0.05613130894715788 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,train,0.7726027397260274,0.020729574613380687,0.7644264041492679,0.0218879028604098,0.7612352689747817,0.02170007212869536 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,test,0.5846153846153846,0.05874986390516781,0.5810455956075435,0.059372108326704684,0.583011583011583,0.059724793915893436 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,train,0.7616438356164383,0.021421714403882366,0.7519935020813646,0.022701634927415712,0.7486566526225804,0.022315277748537008 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,test,0.5846153846153846,0.059759803236291174,0.5644080416976918,0.0632717469472277,0.5656370656370656,0.06100607673386791 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,train,0.7726027397260274,0.021376945787965538,0.7649163103616852,0.02245794604650458,0.7619527385968126,0.022210418367846093 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,test,0.6461538461538462,0.05481004529362075,0.6233308138070043,0.05981157069366302,0.6240347490347491,0.05670678430018822 +flat_mae,patch,logistic,adhd200_dx,61,0.046415888336127774,train,0.8465753424657534,0.01843611702498398,0.8415503875968993,0.019276052569370494,0.8375312938877695,0.019414735742625422 +flat_mae,patch,logistic,adhd200_dx,61,0.046415888336127774,test,0.6,0.05782605215378539,0.570630081300813,0.06462025082173173,0.5748069498069498,0.06019585099642914 +flat_mae,patch,logistic,adhd200_dx,62,0.005994842503189409,train,0.7506849315068493,0.02232112416466696,0.7432776064491695,0.023293795143210155,0.7411003236245954,0.023161227823797138 +flat_mae,patch,logistic,adhd200_dx,62,0.005994842503189409,test,0.6153846153846154,0.057828303301427526,0.6094688776736361,0.058812633461607276,0.61003861003861,0.0589813180290202 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,train,0.7479452054794521,0.022652679740337064,0.7411826365766432,0.02347675034453117,0.7393906087806069,0.02340148274780115 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,test,0.6307692307692307,0.054387050320858096,0.6036585365853658,0.05969664861585038,0.6061776061776062,0.05601578895121886 +flat_mae,patch,logistic,adhd200_dx,64,0.000774263682681127,train,0.6821917808219178,0.022561838626468215,0.6621017684990105,0.025137480206975416,0.6617665018013067,0.02375287889027579 +flat_mae,patch,logistic,adhd200_dx,64,0.000774263682681127,test,0.6461538461538462,0.05765306562821583,0.6289401836684041,0.061598285722530025,0.6283783783783784,0.0592772184233897 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,train,0.7424657534246575,0.022239968652565688,0.7317351598173516,0.023823503086748347,0.7287964828723209,0.023419024548035353 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,test,0.6923076923076923,0.05498508888780666,0.675,0.06005182991021162,0.6732625482625483,0.05740138948852155 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,train,0.7671232876712328,0.022450943330868355,0.7582310539645432,0.0238163839094744,0.754945960798681,0.023494836198817638 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,test,0.6,0.0627080471040012,0.588206627680312,0.06487930173782207,0.5878378378378378,0.06403553819602813 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,train,0.7287671232876712,0.02198942477707515,0.7184102863822326,0.02344107964294839,0.7159430909201929,0.02308483182713702 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,test,0.676923076923077,0.05392555903428136,0.6431372549019607,0.06400365901103103,0.6467181467181468,0.05707649481217139 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,train,0.7506849315068493,0.021545804699090648,0.7381036861817465,0.02359163176172343,0.7346430970263174,0.022966997062049504 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,test,0.6923076923076923,0.053392941048851714,0.675,0.05794569711605071,0.6732625482625483,0.05556853055500761 +flat_mae,patch,logistic,adhd200_dx,69,0.000774263682681127,train,0.7013698630136986,0.02228657986230933,0.6892792152513648,0.023644751139805995,0.6873664285278134,0.023104751539245252 +flat_mae,patch,logistic,adhd200_dx,69,0.000774263682681127,test,0.6,0.06039970218195318,0.5833333333333333,0.06309116516474104,0.5834942084942085,0.06120905099252913 +flat_mae,patch,logistic,adhd200_dx,70,0.005994842503189409,train,0.7506849315068493,0.02242754853708657,0.7411650107149814,0.02384914974267918,0.7382304451364718,0.02350963348129445 +flat_mae,patch,logistic,adhd200_dx,70,0.005994842503189409,test,0.5538461538461539,0.06379998330581665,0.543030303030303,0.06592324327788525,0.542953667953668,0.0652426748177374 +flat_mae,patch,logistic,adhd200_dx,71,0.005994842503189409,train,0.7479452054794521,0.022623226249196018,0.7380328278100231,0.02398974134398388,0.7350857910484215,0.023600122096745915 +flat_mae,patch,logistic,adhd200_dx,71,0.005994842503189409,test,0.7076923076923077,0.058899059174129714,0.7031963470319634,0.05994551473859335,0.7041505791505791,0.0598625565211947 +flat_mae,patch,logistic,adhd200_dx,72,0.046415888336127774,train,0.8301369863013699,0.019901420403049704,0.8258909337108389,0.020547135129038396,0.8236856567136839,0.020587738141997658 +flat_mae,patch,logistic,adhd200_dx,72,0.046415888336127774,test,0.5230769230769231,0.06119439197726745,0.5115151515151515,0.06211723943492022,0.5115830115830116,0.0616030794438102 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,train,0.6958904109589041,0.023214867018059058,0.6805440567711414,0.024920464138923022,0.6789247114856201,0.02406209807910609 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,test,0.5846153846153846,0.06186528598064876,0.5644080416976918,0.06583988908469038,0.5656370656370656,0.06348978168546354 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,train,0.736986301369863,0.020808442647622705,0.7278150633855331,0.021870455473137495,0.7253770531843439,0.021600507003421834 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,test,0.6461538461538462,0.059392586152994115,0.6375757575757576,0.06104786705755178,0.6370656370656371,0.060377216800809694 +flat_mae,patch,logistic,adhd200_dx,75,0.046415888336127774,train,0.8410958904109589,0.018691607693320705,0.8352067505293312,0.01970642273123464,0.830524516089638,0.019712331076839052 +flat_mae,patch,logistic,adhd200_dx,75,0.046415888336127774,test,0.5692307692307692,0.062172266881872405,0.5608108108108107,0.06324153335642566,0.5608108108108107,0.0631161648852578 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,train,0.7589041095890411,0.02185709055618244,0.747674710910005,0.023397547520884517,0.7440770592904683,0.022850822010948978 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,test,0.6,0.04725853355831982,0.5305555555555556,0.062246460803050754,0.5574324324324325,0.04961748378615116 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,train,0.8383561643835616,0.018538682861636194,0.8338669238187078,0.019227785706278352,0.8309672101117421,0.01930060337107804 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,test,0.7076923076923077,0.05663140241637118,0.7006060606060607,0.05853708579531545,0.6998069498069499,0.058041765351309955 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,train,0.8246575342465754,0.019660019698726796,0.8202745122176403,0.020246803057527542,0.8181138181596141,0.020275397814463882 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,test,0.5538461538461539,0.057827787591706106,0.5250692869740489,0.061884788921286356,0.5299227799227799,0.058602122095466114 +flat_mae,patch,logistic,adhd200_dx,79,0.000774263682681127,train,0.7041095890410959,0.023502653775090133,0.6910658307210031,0.02532868110471429,0.6890761433718019,0.024616151754182476 +flat_mae,patch,logistic,adhd200_dx,79,0.000774263682681127,test,0.5692307692307692,0.06097026144822646,0.5608108108108107,0.06235617863777744,0.5608108108108107,0.06192628136374481 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,train,0.7643835616438356,0.021631979412626873,0.7561676609495401,0.022718406532282697,0.7532362459546925,0.022450870677049563 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,test,0.6461538461538462,0.061300308405838284,0.6289401836684041,0.06559541599195283,0.6283783783783784,0.06340944381201295 +flat_mae,patch,logistic,adhd200_dx,81,0.3593813663804626,train,0.9671232876712329,0.00954601069217449,0.9664686428221461,0.009778256565747597,0.9651340294315198,0.010245026320384647 +flat_mae,patch,logistic,adhd200_dx,81,0.3593813663804626,test,0.5692307692307692,0.05852624758862244,0.5512820512820513,0.06128784625556384,0.5521235521235521,0.05952112491424185 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,train,0.7424657534246575,0.021916682162854504,0.7317351598173516,0.02334291956845332,0.7287964828723209,0.022908762424536256 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,test,0.7384615384615385,0.05421625774203063,0.7362138935306756,0.05485036841489733,0.7398648648648649,0.05504295477455313 +flat_mae,patch,logistic,adhd200_dx,83,0.046415888336127774,train,0.8328767123287671,0.01918328774753311,0.8279113625648279,0.01996792818427317,0.8246779019356415,0.01999406248268765 +flat_mae,patch,logistic,adhd200_dx,83,0.046415888336127774,test,0.6153846153846154,0.06273678648686291,0.6094688776736361,0.0638707405788139,0.61003861003861,0.0639355704339056 +flat_mae,patch,logistic,adhd200_dx,84,0.000774263682681127,train,0.6876712328767123,0.022585657970404342,0.6706401975683891,0.024240985526051344,0.6694907492214691,0.02329561960076557 +flat_mae,patch,logistic,adhd200_dx,84,0.000774263682681127,test,0.6153846153846154,0.06040181038863696,0.61207925519217,0.06136334420268851,0.6143822393822393,0.06180415776391121 +flat_mae,patch,logistic,adhd200_dx,85,0.000774263682681127,train,0.6821917808219178,0.023139862267220142,0.6621017684990105,0.02552889641824621,0.6617665018013067,0.024121186087777198 +flat_mae,patch,logistic,adhd200_dx,85,0.000774263682681127,test,0.6,0.05385423236102494,0.5626293995859213,0.061435677638700885,0.5704633204633205,0.055622556378185764 +flat_mae,patch,logistic,adhd200_dx,86,0.046415888336127774,train,0.8191780821917808,0.019119924342144115,0.812872856077554,0.020122339604502746,0.8089546314953899,0.020085261333795398 +flat_mae,patch,logistic,adhd200_dx,86,0.046415888336127774,test,0.5846153846153846,0.05887179129813265,0.5501153550371699,0.06543573987092743,0.556949806949807,0.06034492166304654 +flat_mae,patch,logistic,adhd200_dx,87,0.046415888336127774,train,0.8356164383561644,0.018860680708484245,0.8305687937117039,0.019707023958580223,0.8271050864016609,0.019796796567559866 +flat_mae,patch,logistic,adhd200_dx,87,0.046415888336127774,test,0.5846153846153846,0.06171822973528003,0.5699583435432491,0.06466889068390361,0.5699806949806949,0.0633015003255363 +flat_mae,patch,logistic,adhd200_dx,88,0.000774263682681127,train,0.6986301369863014,0.023626606843718482,0.6867783810772015,0.024932194098526976,0.684939244061794,0.024450030141254516 +flat_mae,patch,logistic,adhd200_dx,88,0.000774263682681127,test,0.5692307692307692,0.05893200173422218,0.545,0.06342751147989714,0.5477799227799228,0.06035422217292734 +flat_mae,patch,logistic,adhd200_dx,89,0.000774263682681127,train,0.6986301369863014,0.022532127143532404,0.6830196425187898,0.024340032755200403,0.6813518959516395,0.023399732502318138 +flat_mae,patch,logistic,adhd200_dx,89,0.000774263682681127,test,0.6615384615384615,0.05229842451834082,0.6366869918699187,0.05807127810247345,0.6375482625482626,0.05435196774360066 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,train,0.7726027397260274,0.021425413573178936,0.7639197350477304,0.02271755733742019,0.7605177993527508,0.022477915738060166 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,test,0.6461538461538462,0.058365354074253833,0.6289401836684041,0.06364232891221606,0.6283783783783784,0.06071089643018875 +flat_mae,patch,logistic,adhd200_dx,91,0.046415888336127774,train,0.8575342465753425,0.018370041779441328,0.8539730411768327,0.01896553080118171,0.8515448494840325,0.019116424454876043 +flat_mae,patch,logistic,adhd200_dx,91,0.046415888336127774,test,0.5384615384615384,0.05467313090178346,0.5125,0.05817411414308972,0.5164092664092664,0.05537289452171042 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,train,0.7506849315068493,0.020639049176503128,0.74059090447591,0.021915645770071168,0.737512975514441,0.02152060987961763 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,test,0.6153846153846154,0.05465672108499845,0.5905769715293525,0.05998352883650437,0.5926640926640927,0.056329223481131084 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,train,0.7561643835616438,0.02157270503557849,0.7462922032786373,0.022825910576414116,0.7430848140685107,0.02245759526190402 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,test,0.6,0.048062090019488544,0.5427489177489178,0.05885037218273165,0.5617760617760618,0.049888139386200236 +flat_mae,patch,logistic,adhd200_dx,94,0.005994842503189409,train,0.7397260273972602,0.02188914838541792,0.7303675710142223,0.022984216370794272,0.7278042376503633,0.022667099903711435 +flat_mae,patch,logistic,adhd200_dx,94,0.005994842503189409,test,0.676923076923077,0.05151375756955526,0.6431372549019607,0.06135123667299099,0.6467181467181468,0.05472765618837617 +flat_mae,patch,logistic,adhd200_dx,95,0.000774263682681127,train,0.6986301369863014,0.02488720073386717,0.6893761218047905,0.025855162290165698,0.6878091225499176,0.025580141301761308 +flat_mae,patch,logistic,adhd200_dx,95,0.000774263682681127,test,0.5384615384615384,0.06190759290403397,0.5192307692307693,0.06543764537688596,0.5207528957528957,0.06332721930480743 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,train,0.7479452054794521,0.022703175113961645,0.7411826365766432,0.023381532899840386,0.7393906087806069,0.023257137914097906 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,test,0.6461538461538462,0.0602544427392909,0.6431129147767964,0.060720651457537377,0.6457528957528957,0.060999149259355095 +flat_mae,patch,logistic,adhd200_dx,97,0.005994842503189409,train,0.7780821917808219,0.021283615911043573,0.7685696169833032,0.0226973017780711,0.7646546986627587,0.02229178197844689 +flat_mae,patch,logistic,adhd200_dx,97,0.005994842503189409,test,0.6153846153846154,0.05090052951910661,0.5656241646618552,0.06264535151441536,0.5796332046332047,0.053371860799706405 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,train,0.7397260273972602,0.022053399768711488,0.729787648548607,0.023354671805246387,0.7270867680283324,0.022997980591105933 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,test,0.6923076923076923,0.0566086618082072,0.6697154471544715,0.06270357711228097,0.6689189189189189,0.0587243208373348 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,train,0.7671232876712328,0.023014702522135673,0.7602043576723012,0.02410223543389857,0.7578158392868046,0.024069057153170294 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,test,0.5692307692307692,0.05758990707102855,0.5608108108108107,0.058365391499428834,0.5608108108108107,0.05808956055244975 +flat_mae,patch,logistic,adhd200_dx,100,0.3593813663804626,train,0.9643835616438357,0.009548248260751279,0.9636456968610415,0.009794317309887304,0.9619893753434695,0.01030868453289804 +flat_mae,patch,logistic,adhd200_dx,100,0.3593813663804626,test,0.6153846153846154,0.06101949635084874,0.6094688776736361,0.06163909468077069,0.61003861003861,0.06159971511889918 diff --git a/data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic/log.txt b/data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d80713d15a0c9f310d6e1e88b015d427c495222c --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:20:26 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/adhd200_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:09:29 time: 3.7683 data: 2.9924 max mem: 2698 +extract (train) [ 20/151] eta: 0:00:45 time: 0.1764 data: 0.0500 max mem: 2851 +extract (train) [ 40/151] eta: 0:00:28 time: 0.1602 data: 0.0409 max mem: 2851 +extract (train) [ 60/151] eta: 0:00:20 time: 0.1550 data: 0.0403 max mem: 2851 +extract (train) [ 80/151] eta: 0:00:14 time: 0.1488 data: 0.0384 max mem: 2851 +extract (train) [100/151] eta: 0:00:09 time: 0.1544 data: 0.0420 max mem: 2851 +extract (train) [120/151] eta: 0:00:05 time: 0.1550 data: 0.0403 max mem: 2851 +extract (train) [140/151] eta: 0:00:01 time: 0.1365 data: 0.0325 max mem: 2851 +extract (train) [150/151] eta: 0:00:00 time: 0.1357 data: 0.0327 max mem: 2851 +extract (train) Total time: 0:00:27 (0.1798 s / it) +extract (validation) [ 0/32] eta: 0:01:58 time: 3.7075 data: 3.5771 max mem: 2851 +extract (validation) [20/32] eta: 0:00:04 time: 0.1658 data: 0.0451 max mem: 2851 +extract (validation) [31/32] eta: 0:00:00 time: 0.1288 data: 0.0304 max mem: 2851 +extract (validation) Total time: 0:00:08 (0.2717 s / it) +extract (test) [ 0/33] eta: 0:02:01 time: 3.6960 data: 3.4967 max mem: 2851 +extract (test) [20/33] eta: 0:00:04 time: 0.1699 data: 0.0438 max mem: 2851 +extract (test) [32/33] eta: 0:00:00 time: 0.1318 data: 0.0317 max mem: 2851 +extract (test) Total time: 0:00:08 (0.2697 s / it) +feature extraction time: 0:00:44 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | train | 0.7589 | 0.02163 | 0.7505 | 0.02265 | 0.74766 | 0.022401 | +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | test | 0.63077 | 0.058589 | 0.60366 | 0.065902 | 0.60618 | 0.061262 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05687315898574434, "f1": 0.5775, "f1_std": 0.060290791754569605, "bacc": 0.5791505791505791, "bacc_std": 0.057933054764911535} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05578402138085886, "f1": 0.6289401836684041, "f1_std": 0.05968094560415457, "bacc": 0.6283783783783784, "bacc_std": 0.05752966114564144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05977697405539139, "f1": 0.5512820512820513, "f1_std": 0.062463142528234775, "bacc": 0.5521235521235521, "bacc_std": 0.06089063844054056} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.06090439496000094, "f1": 0.6407113674597452, "f1_std": 0.06188663055524596, "bacc": 0.6414092664092663, "bacc_std": 0.06189718308359479} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06250698611842893, "f1": 0.5066413662239089, "f1_std": 0.06284448377491744, "bacc": 0.5111003861003861, "bacc_std": 0.06397993758826467} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05822523091231928, "f1": 0.6336682185738789, "f1_std": 0.060916278468128957, "bacc": 0.6327220077220077, "bacc_std": 0.05971745127252575} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05940116150519012, "f1": 0.5921814671814671, "f1_std": 0.06059235440050431, "bacc": 0.5921814671814671, "bacc_std": 0.060227089811499164} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05772658673928373, "f1": 0.6934723256391164, "f1_std": 0.06203714844494515, "bacc": 0.6911196911196911, "bacc_std": 0.06015470888144337} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06393099830581951, "f1": 0.564176245210728, "f1_std": 0.06450444271486197, "bacc": 0.5651544401544402, "bacc_std": 0.06476597677360901} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05866893576390013, "f1": 0.5833333333333333, "f1_std": 0.06148898240968189, "bacc": 0.5834942084942085, "bacc_std": 0.05980521627926769} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.057128204207521974, "f1": 0.5699583435432491, "f1_std": 0.059513438420802985, "bacc": 0.5699806949806949, "bacc_std": 0.058284035366820204} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.058593300907549176, "f1": 0.5512820512820513, "f1_std": 0.061481823260508384, "bacc": 0.5521235521235521, "bacc_std": 0.05952379381804377} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05620752755500062, "f1": 0.6575670498084292, "f1_std": 0.05687973184477358, "bacc": 0.6592664092664093, "bacc_std": 0.05712165929996483} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 166.81005372000556, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.062257484044302876, "f1": 0.5521501544309813, "f1_std": 0.062437789287548354, "bacc": 0.555984555984556, "bacc_std": 0.06253361281411537} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06079342069665104, "f1": 0.5608108108108107, "f1_std": 0.06159096857721931, "bacc": 0.5608108108108107, "bacc_std": 0.061467094643038345} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.061070528632521444, "f1": 0.6235521235521235, "f1_std": 0.06221801201152397, "bacc": 0.6235521235521235, "bacc_std": 0.06208149045894029} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05686653324715949, "f1": 0.570630081300813, "f1_std": 0.06270939294419887, "bacc": 0.5748069498069498, "bacc_std": 0.058517693114287214} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05622191027763351, "f1": 0.6233308138070043, "f1_std": 0.06167850873617281, "bacc": 0.6240347490347491, "bacc_std": 0.05819478120036686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05866280335136926, "f1": 0.6719538572458543, "f1_std": 0.060002484258066535, "bacc": 0.6727799227799228, "bacc_std": 0.060091977733538006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05351320359107538, "f1": 0.6336682185738789, "f1_std": 0.05682691420110582, "bacc": 0.6327220077220077, "bacc_std": 0.055511956321264475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.06106820321400477, "f1": 0.6904761904761905, "f1_std": 0.06137632898779881, "bacc": 0.6949806949806949, "bacc_std": 0.061370674064897296} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06122946268416632, "f1": 0.6235521235521235, "f1_std": 0.06302460190297848, "bacc": 0.6235521235521235, "bacc_std": 0.06303917241435883} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.055204699225410575, "f1": 0.5834401435529352, "f1_std": 0.06247001238906739, "bacc": 0.5883204633204633, "bacc_std": 0.05727929666139208} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05563814620931819, "f1": 0.5834401435529352, "f1_std": 0.0622450777161757, "bacc": 0.5883204633204633, "bacc_std": 0.057215029905661484} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 0.000774263682681127, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.057996620999307184, "f1": 0.6233308138070043, "f1_std": 0.06403036874752153, "bacc": 0.6240347490347491, "bacc_std": 0.06045991437518356} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05811573028456991, "f1": 0.5565302144249512, "f1_std": 0.060251558916601756, "bacc": 0.5564671814671815, "bacc_std": 0.05940637368331731} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05703211178930629, "f1": 0.5966741126830479, "f1_std": 0.0608086801229079, "bacc": 0.597007722007722, "bacc_std": 0.058438101460180844} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05356444108839768, "f1": 0.5905769715293525, "f1_std": 0.05807956011402474, "bacc": 0.5926640926640927, "bacc_std": 0.05509674327527941} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.05994657187270963, "f1": 0.5775, "f1_std": 0.06495994457602072, "bacc": 0.5791505791505791, "bacc_std": 0.06171941583500162} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.06156106507954367, "f1": 0.5953065134099617, "f1_std": 0.062024495704855576, "bacc": 0.5965250965250966, "bacc_std": 0.06207337116376116} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06090936908722295, "f1": 0.6018132810585641, "f1_std": 0.06398252282586593, "bacc": 0.6013513513513513, "bacc_std": 0.06267481158636691} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.0628158122783535, "f1": 0.5469838981014179, "f1_std": 0.06394245999381412, "bacc": 0.5472972972972974, "bacc_std": 0.06391276428040849} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 21.54434690031882, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06125531784667386, "f1": 0.5383522727272727, "f1_std": 0.06167709416969898, "bacc": 0.5511583011583012, "bacc_std": 0.06157844916591004} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.060463495760058626, "f1": 0.6139225469232596, "f1_std": 0.0608400465042012, "bacc": 0.6187258687258688, "bacc_std": 0.06101202760295734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06095454509753426, "f1": 0.5512820512820513, "f1_std": 0.06360584676706754, "bacc": 0.5521235521235521, "bacc_std": 0.061909132881585074} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.059029299715074306, "f1": 0.6198830409356726, "f1_std": 0.06151590602643233, "bacc": 0.6192084942084942, "bacc_std": 0.06038237553808238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05391319782213149, "f1": 0.6036585365853658, "f1_std": 0.060416268683395516, "bacc": 0.6061776061776062, "bacc_std": 0.05594752349950534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05838436201424702, "f1": 0.5192307692307693, "f1_std": 0.06038449040782191, "bacc": 0.5207528957528957, "bacc_std": 0.058702405925690114} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.000774263682681127, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05314475932570381, "f1": 0.6834145091002307, "f1_std": 0.06073000792755282, "bacc": 0.6824324324324325, "bacc_std": 0.056236247529993295} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05805947572871711, "f1": 0.675, "f1_std": 0.06312847423068553, "bacc": 0.6732625482625483, "bacc_std": 0.06047613315133476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05950246576363591, "f1": 0.6198830409356726, "f1_std": 0.06152396985632353, "bacc": 0.6192084942084942, "bacc_std": 0.060583424993649715} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06428486897161902, "f1": 0.5921814671814671, "f1_std": 0.06535737067630322, "bacc": 0.5921814671814671, "bacc_std": 0.06494250595398127} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06062655890754822, "f1": 0.5833333333333333, "f1_std": 0.06277166102990825, "bacc": 0.5834942084942085, "bacc_std": 0.0613045790725833} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.055332103310869483, "f1": 0.6289401836684041, "f1_std": 0.05926238341237621, "bacc": 0.6283783783783784, "bacc_std": 0.05714172376882657} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05798623791023391, "f1": 0.5125, "f1_std": 0.061062661875850015, "bacc": 0.5164092664092664, "bacc_std": 0.05854938121882263} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.000774263682681127, "split": "test", "acc": 0.676923076923077, "acc_std": 0.054547337891100764, "f1": 0.656084656084656, "f1_std": 0.059836480438581294, "bacc": 0.6554054054054055, "bacc_std": 0.05645306785801666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06073737042118694, "f1": 0.5699583435432491, "f1_std": 0.0628849453603333, "bacc": 0.5699806949806949, "bacc_std": 0.06167383296840495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.7846153846153846, "acc_std": 0.04928819727411974, "f1": 0.7804054054054055, "f1_std": 0.05067015258726057, "bacc": 0.7804054054054055, "bacc_std": 0.050877748827951805} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06108239463962789, "f1": 0.5321419707123356, "f1_std": 0.06378864853669464, "bacc": 0.5342664092664092, "bacc_std": 0.06179659742532306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05332427536493362, "f1": 0.6366869918699187, "f1_std": 0.059724289881772565, "bacc": 0.6375482625482626, "bacc_std": 0.055570078181126525} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06055287679023982, "f1": 0.5357142857142857, "f1_std": 0.06099381124512972, "bacc": 0.5381274131274132, "bacc_std": 0.061617172930203114} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05943018578339227, "f1": 0.6336682185738789, "f1_std": 0.06303502621643099, "bacc": 0.6327220077220077, "bacc_std": 0.061809551996838} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06013264233805307, "f1": 0.5376016260162602, "f1_std": 0.06431226753516604, "bacc": 0.5434362934362934, "bacc_std": 0.060802090322816346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06102415081136747, "f1": 0.5833333333333333, "f1_std": 0.06416582666963139, "bacc": 0.5834942084942085, "bacc_std": 0.062406250972183025} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.05939194056057012, "f1": 0.5047619047619047, "f1_std": 0.059576580006898625, "bacc": 0.5067567567567568, "bacc_std": 0.059868340562345655} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.061128255893973076, "f1": 0.606060606060606, "f1_std": 0.06283552272328789, "bacc": 0.6056949806949807, "bacc_std": 0.062374944610876976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05544881748916513, "f1": 0.5167905665214048, "f1_std": 0.06014532054167831, "bacc": 0.5255791505791506, "bacc_std": 0.05613130894715788} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05874986390516781, "f1": 0.5810455956075435, "f1_std": 0.059372108326704684, "bacc": 0.583011583011583, "bacc_std": 0.059724793915893436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.059759803236291174, "f1": 0.5644080416976918, "f1_std": 0.0632717469472277, "bacc": 0.5656370656370656, "bacc_std": 0.06100607673386791} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05481004529362075, "f1": 0.6233308138070043, "f1_std": 0.05981157069366302, "bacc": 0.6240347490347491, "bacc_std": 0.05670678430018822} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05782605215378539, "f1": 0.570630081300813, "f1_std": 0.06462025082173173, "bacc": 0.5748069498069498, "bacc_std": 0.06019585099642914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.057828303301427526, "f1": 0.6094688776736361, "f1_std": 0.058812633461607276, "bacc": 0.61003861003861, "bacc_std": 0.0589813180290202} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.054387050320858096, "f1": 0.6036585365853658, "f1_std": 0.05969664861585038, "bacc": 0.6061776061776062, "bacc_std": 0.05601578895121886} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 0.000774263682681127, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05765306562821583, "f1": 0.6289401836684041, "f1_std": 0.061598285722530025, "bacc": 0.6283783783783784, "bacc_std": 0.0592772184233897} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05498508888780666, "f1": 0.675, "f1_std": 0.06005182991021162, "bacc": 0.6732625482625483, "bacc_std": 0.05740138948852155} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.0627080471040012, "f1": 0.588206627680312, "f1_std": 0.06487930173782207, "bacc": 0.5878378378378378, "bacc_std": 0.06403553819602813} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05392555903428136, "f1": 0.6431372549019607, "f1_std": 0.06400365901103103, "bacc": 0.6467181467181468, "bacc_std": 0.05707649481217139} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.053392941048851714, "f1": 0.675, "f1_std": 0.05794569711605071, "bacc": 0.6732625482625483, "bacc_std": 0.05556853055500761} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.06039970218195318, "f1": 0.5833333333333333, "f1_std": 0.06309116516474104, "bacc": 0.5834942084942085, "bacc_std": 0.06120905099252913} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06379998330581665, "f1": 0.543030303030303, "f1_std": 0.06592324327788525, "bacc": 0.542953667953668, "bacc_std": 0.0652426748177374} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.058899059174129714, "f1": 0.7031963470319634, "f1_std": 0.05994551473859335, "bacc": 0.7041505791505791, "bacc_std": 0.0598625565211947} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06119439197726745, "f1": 0.5115151515151515, "f1_std": 0.06211723943492022, "bacc": 0.5115830115830116, "bacc_std": 0.0616030794438102} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06186528598064876, "f1": 0.5644080416976918, "f1_std": 0.06583988908469038, "bacc": 0.5656370656370656, "bacc_std": 0.06348978168546354} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.059392586152994115, "f1": 0.6375757575757576, "f1_std": 0.06104786705755178, "bacc": 0.6370656370656371, "bacc_std": 0.060377216800809694} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.062172266881872405, "f1": 0.5608108108108107, "f1_std": 0.06324153335642566, "bacc": 0.5608108108108107, "bacc_std": 0.0631161648852578} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04725853355831982, "f1": 0.5305555555555556, "f1_std": 0.062246460803050754, "bacc": 0.5574324324324325, "bacc_std": 0.04961748378615116} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05663140241637118, "f1": 0.7006060606060607, "f1_std": 0.05853708579531545, "bacc": 0.6998069498069499, "bacc_std": 0.058041765351309955} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.057827787591706106, "f1": 0.5250692869740489, "f1_std": 0.061884788921286356, "bacc": 0.5299227799227799, "bacc_std": 0.058602122095466114} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06097026144822646, "f1": 0.5608108108108107, "f1_std": 0.06235617863777744, "bacc": 0.5608108108108107, "bacc_std": 0.06192628136374481} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.061300308405838284, "f1": 0.6289401836684041, "f1_std": 0.06559541599195283, "bacc": 0.6283783783783784, "bacc_std": 0.06340944381201295} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05852624758862244, "f1": 0.5512820512820513, "f1_std": 0.06128784625556384, "bacc": 0.5521235521235521, "bacc_std": 0.05952112491424185} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.05421625774203063, "f1": 0.7362138935306756, "f1_std": 0.05485036841489733, "bacc": 0.7398648648648649, "bacc_std": 0.05504295477455313} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06273678648686291, "f1": 0.6094688776736361, "f1_std": 0.0638707405788139, "bacc": 0.61003861003861, "bacc_std": 0.0639355704339056} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06040181038863696, "f1": 0.61207925519217, "f1_std": 0.06136334420268851, "bacc": 0.6143822393822393, "bacc_std": 0.06180415776391121} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.05385423236102494, "f1": 0.5626293995859213, "f1_std": 0.061435677638700885, "bacc": 0.5704633204633205, "bacc_std": 0.055622556378185764} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05887179129813265, "f1": 0.5501153550371699, "f1_std": 0.06543573987092743, "bacc": 0.556949806949807, "bacc_std": 0.06034492166304654} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06171822973528003, "f1": 0.5699583435432491, "f1_std": 0.06466889068390361, "bacc": 0.5699806949806949, "bacc_std": 0.0633015003255363} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05893200173422218, "f1": 0.545, "f1_std": 0.06342751147989714, "bacc": 0.5477799227799228, "bacc_std": 0.06035422217292734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.000774263682681127, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05229842451834082, "f1": 0.6366869918699187, "f1_std": 0.05807127810247345, "bacc": 0.6375482625482626, "bacc_std": 0.05435196774360066} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.058365354074253833, "f1": 0.6289401836684041, "f1_std": 0.06364232891221606, "bacc": 0.6283783783783784, "bacc_std": 0.06071089643018875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05467313090178346, "f1": 0.5125, "f1_std": 0.05817411414308972, "bacc": 0.5164092664092664, "bacc_std": 0.05537289452171042} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05465672108499845, "f1": 0.5905769715293525, "f1_std": 0.05998352883650437, "bacc": 0.5926640926640927, "bacc_std": 0.056329223481131084} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.048062090019488544, "f1": 0.5427489177489178, "f1_std": 0.05885037218273165, "bacc": 0.5617760617760618, "bacc_std": 0.049888139386200236} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05151375756955526, "f1": 0.6431372549019607, "f1_std": 0.06135123667299099, "bacc": 0.6467181467181468, "bacc_std": 0.05472765618837617} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06190759290403397, "f1": 0.5192307692307693, "f1_std": 0.06543764537688596, "bacc": 0.5207528957528957, "bacc_std": 0.06332721930480743} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.0602544427392909, "f1": 0.6431129147767964, "f1_std": 0.060720651457537377, "bacc": 0.6457528957528957, "bacc_std": 0.060999149259355095} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05090052951910661, "f1": 0.5656241646618552, "f1_std": 0.06264535151441536, "bacc": 0.5796332046332047, "bacc_std": 0.053371860799706405} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.0566086618082072, "f1": 0.6697154471544715, "f1_std": 0.06270357711228097, "bacc": 0.6689189189189189, "bacc_std": 0.0587243208373348} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05758990707102855, "f1": 0.5608108108108107, "f1_std": 0.058365391499428834, "bacc": 0.5608108108108107, "bacc_std": 0.05808956055244975} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 0.3593813663804626, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06101949635084874, "f1": 0.6094688776736361, "f1_std": 0.06163909468077069, "bacc": 0.61003861003861, "bacc_std": 0.06159971511889918} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | train | 100 | 1.9052 | 16.796 | 0.77334 | 0.065003 | 0.76427 | 0.069092 | 0.76176 | 0.068906 | +| flat_mae | patch | logistic | adhd200_dx | test | 100 | 1.9052 | 16.796 | 0.61354 | 0.052127 | 0.59663 | 0.053926 | 0.59856 | 0.052574 | + + +done! total time: 0:04:31 diff --git a/data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml b/data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad4b182b9f1c0190bebbcba00c50daf5fa1d1c34 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null diff --git a/data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv b/data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..fece62c9c4d6f8e45033647445f5ee1e6c18e428 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adni_ad_vs_cn,,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,,2.782559402207126,test,0.6829268292682927,0.07119017103583779,0.6072218128224024,0.0819768697140241,0.6371527777777778,0.09453898326954276 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.046415888336127774,train,0.8970189701897019,0.014137358141969744,0.8410204081632653,0.02383000049287591,0.8114471197304627,0.026005456528804326 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.046415888336127774,test,0.8292682926829268,0.044615456569465285,0.7144278606965174,0.09340480274059657,0.6838709677419355,0.08078069230787673 +flat_mae,patch,logistic,adni_ad_vs_cn,2,0.3593813663804626,train,0.978319783197832,0.007761886639423402,0.9689106074648244,0.011442659869560562,0.9575355411290984,0.015471867805818558 +flat_mae,patch,logistic,adni_ad_vs_cn,2,0.3593813663804626,test,0.6585365853658537,0.06538905667538786,0.5370967741935484,0.08187743070866882,0.5370967741935484,0.08222176928546894 +flat_mae,patch,logistic,adni_ad_vs_cn,3,0.005994842503189409,train,0.8319783197831978,0.014361347742713944,0.7033146591970121,0.030641635468584425,0.6719122360095324,0.026091701895980205 +flat_mae,patch,logistic,adni_ad_vs_cn,3,0.005994842503189409,test,0.7073170731707317,0.03283097157905182,0.4142857142857143,0.011405586722440925,0.46774193548387094,0.021710803786147183 +flat_mae,patch,logistic,adni_ad_vs_cn,4,0.046415888336127774,train,0.8943089430894309,0.014420909557716986,0.8360205558277596,0.02445590441046167,0.8056331662420906,0.025856371419812795 +flat_mae,patch,logistic,adni_ad_vs_cn,4,0.046415888336127774,test,0.8780487804878049,0.04911444222889114,0.8144796380090498,0.0856773303969942,0.7838709677419355,0.08576489388982769 +flat_mae,patch,logistic,adni_ad_vs_cn,5,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,5,166.81005372000556,test,0.7560975609756098,0.05299983724846774,0.6117424242424243,0.09074299984464605,0.6016129032258064,0.07851778565326317 +flat_mae,patch,logistic,adni_ad_vs_cn,6,0.3593813663804626,train,0.975609756097561,0.007552179904022587,0.9654641694657917,0.010809304132893703,0.959815925712877,0.013457190657372246 +flat_mae,patch,logistic,adni_ad_vs_cn,6,0.3593813663804626,test,0.7804878048780488,0.055439963294014204,0.6660633484162897,0.09086103084408982,0.6516129032258065,0.0833982256877615 +flat_mae,patch,logistic,adni_ad_vs_cn,7,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,7,21.54434690031882,test,0.7804878048780488,0.06298004934191503,0.7119437939110069,0.0836242294036542,0.7193548387096774,0.08865864872507646 +flat_mae,patch,logistic,adni_ad_vs_cn,8,0.3593813663804626,train,0.986449864498645,0.005689983122340613,0.9806516564069758,0.008339549424029735,0.9709302325581395,0.012206998675254 +flat_mae,patch,logistic,adni_ad_vs_cn,8,0.3593813663804626,test,0.7560975609756098,0.062276847310106964,0.6693548387096775,0.08344388183577806,0.6693548387096775,0.08534050516032682 +flat_mae,patch,logistic,adni_ad_vs_cn,9,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,9,21.54434690031882,test,0.7317073170731707,0.06852453674147349,0.6479313036690086,0.08802308060663055,0.6532258064516129,0.09222527383659629 +flat_mae,patch,logistic,adni_ad_vs_cn,10,0.3593813663804626,train,0.9728997289972899,0.008120688340934412,0.9611382593310305,0.01199552213742945,0.9499548031884295,0.015728711938255358 +flat_mae,patch,logistic,adni_ad_vs_cn,10,0.3593813663804626,test,0.7317073170731707,0.05752977657993554,0.5918552036199095,0.08764680134574954,0.5854838709677419,0.07860463270722981 +flat_mae,patch,logistic,adni_ad_vs_cn,11,0.3593813663804626,train,0.981029810298103,0.006917051001222029,0.9729123189697663,0.010083091150791252,0.9633494946174705,0.013415663004802746 +flat_mae,patch,logistic,adni_ad_vs_cn,11,0.3593813663804626,test,0.7804878048780488,0.04608377649109437,0.6328358208955224,0.08553723647886512,0.6177419354838709,0.07055688187274561 +flat_mae,patch,logistic,adni_ad_vs_cn,12,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,12,2.782559402207126,test,0.7317073170731707,0.06818639813002635,0.6835087719298245,0.07446293809244599,0.7209677419354839,0.08189114491191579 +flat_mae,patch,logistic,adni_ad_vs_cn,13,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,13,21.54434690031882,test,0.7073170731707317,0.06129624964145098,0.5729166666666666,0.0851123418248348,0.5693548387096774,0.07940571900183079 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.046415888336127774,train,0.8997289972899729,0.013636299487554512,0.8428467833834041,0.0241633357799887,0.8091667351466842,0.02643989518470863 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.046415888336127774,test,0.8048780487804879,0.055148515075662416,0.7152777777777778,0.08295335683947047,0.7016129032258065,0.08160677931888531 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.046415888336127774,train,0.8997289972899729,0.014458205640497128,0.8459672597222379,0.024383420766061283,0.8172610732188348,0.02668283172865654 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.046415888336127774,test,0.7804878048780488,0.04905471698801797,0.6328358208955224,0.0935793641588938,0.6177419354838709,0.07665219800434285 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.3593813663804626,train,0.9728997289972899,0.008204923806542105,0.9611382593310305,0.012098079811690352,0.9499548031884295,0.01595396518720465 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.3593813663804626,test,0.8048780487804879,0.05054553080260012,0.6893939393939394,0.09268677112506415,0.667741935483871,0.08232770714141972 +flat_mae,patch,logistic,adni_ad_vs_cn,17,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,17,10000.0,test,0.6341463414634146,0.07240804133335124,0.5467943994104643,0.08136393805526873,0.5548387096774194,0.08832380498220657 +flat_mae,patch,logistic,adni_ad_vs_cn,18,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,18,2.782559402207126,test,0.7560975609756098,0.06014014962293201,0.6440972222222222,0.0909942791935425,0.635483870967742,0.08612247232478888 +flat_mae,patch,logistic,adni_ad_vs_cn,19,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,19,21.54434690031882,test,0.7560975609756098,0.06663950269511851,0.6693548387096775,0.08651149879664517,0.6693548387096775,0.08716104462609528 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.046415888336127774,train,0.8943089430894309,0.014384071270159778,0.8326335988835263,0.02549009051520209,0.79753882816994,0.02710754202091793 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.046415888336127774,test,0.7804878048780488,0.061023902827009785,0.6917293233082706,0.0859515648188523,0.685483870967742,0.08622919505065721 +flat_mae,patch,logistic,adni_ad_vs_cn,21,0.3593813663804626,train,0.9728997289972899,0.00838421945712829,0.9614661654135338,0.012066039309020152,0.9540019722245049,0.01481405474193395 +flat_mae,patch,logistic,adni_ad_vs_cn,21,0.3593813663804626,test,0.8048780487804879,0.050141975704424815,0.6893939393939394,0.09304177549058085,0.667741935483871,0.08195253467493364 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.3593813663804626,train,0.967479674796748,0.009056861914987564,0.9533659111972366,0.013340976090473359,0.9423740652477608,0.01697015188220607 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.3593813663804626,test,0.8048780487804879,0.05387972151230834,0.7152777777777778,0.0837612798474481,0.7016129032258065,0.08098127376963613 +flat_mae,patch,logistic,adni_ad_vs_cn,23,0.046415888336127774,train,0.8888888888888888,0.013971056831086919,0.8258572464518803,0.024145995596558483,0.7940052592653464,0.025461749994848536 +flat_mae,patch,logistic,adni_ad_vs_cn,23,0.046415888336127774,test,0.8048780487804879,0.044049185639494816,0.6554621848739496,0.0971305937716581,0.6338709677419355,0.07753342161430543 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.046415888336127774,train,0.8997289972899729,0.013111014764196292,0.8444297580930026,0.022804462313774948,0.8132139041827595,0.025316843284725967 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.046415888336127774,test,0.7317073170731707,0.06429009210516919,0.6232247284878863,0.08753730109380746,0.6193548387096774,0.08564302141789398 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.046415888336127774,train,0.9105691056910569,0.013251964908024662,0.8639517345399699,0.02213300881277581,0.8364697181362479,0.025370755352628824 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.046415888336127774,test,0.7073170731707317,0.06811564159998082,0.6272727272727273,0.08113468788804802,0.6370967741935484,0.08670078553438183 +flat_mae,patch,logistic,adni_ad_vs_cn,26,0.046415888336127774,train,0.8943089430894309,0.014817084013314446,0.8326335988835263,0.026295830092707705,0.79753882816994,0.028024208265088505 +flat_mae,patch,logistic,adni_ad_vs_cn,26,0.046415888336127774,test,0.7560975609756098,0.06170565889992301,0.6440972222222222,0.08828459934581037,0.635483870967742,0.0828625453096422 +flat_mae,patch,logistic,adni_ad_vs_cn,27,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,27,21.54434690031882,test,0.8292682926829268,0.04548567967310384,0.7144278606965174,0.09168480968679629,0.6838709677419355,0.08039465732322407 +flat_mae,patch,logistic,adni_ad_vs_cn,28,0.3593813663804626,train,0.981029810298103,0.006476842307014322,0.9729123189697663,0.00947509278686521,0.9633494946174705,0.01299114569050403 +flat_mae,patch,logistic,adni_ad_vs_cn,28,0.3593813663804626,test,0.8048780487804879,0.05800367175396999,0.7354838709677419,0.07882295407806768,0.7354838709677419,0.08248821843791629 +flat_mae,patch,logistic,adni_ad_vs_cn,29,0.3593813663804626,train,0.967479674796748,0.008341961734817103,0.9525462962962963,0.012706583279675906,0.9342797271756101,0.01690051727445074 +flat_mae,patch,logistic,adni_ad_vs_cn,29,0.3593813663804626,test,0.8048780487804879,0.05232204763433492,0.6893939393939394,0.09536650707830106,0.667741935483871,0.08442171505569435 +flat_mae,patch,logistic,adni_ad_vs_cn,30,0.046415888336127774,train,0.8915989159891599,0.01490641433407598,0.8342913597988144,0.024511106701010574,0.807913550825869,0.02600454293223648 +flat_mae,patch,logistic,adni_ad_vs_cn,30,0.046415888336127774,test,0.7804878048780488,0.053265321993462324,0.6660633484162897,0.09090586944108178,0.6516129032258065,0.08349214017728838 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.046415888336127774,train,0.8915989159891599,0.014584288248262378,0.8342913597988144,0.024332874393620668,0.807913550825869,0.026611245695182088 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.046415888336127774,test,0.7317073170731707,0.04866471627730898,0.5512437810945273,0.08473903330474096,0.5516129032258065,0.06804048337373515 +flat_mae,patch,logistic,adni_ad_vs_cn,32,0.046415888336127774,train,0.8970189701897019,0.014554010238374113,0.8425767918088738,0.024133527779968737,0.815494288766538,0.02614678786060549 +flat_mae,patch,logistic,adni_ad_vs_cn,32,0.046415888336127774,test,0.7560975609756098,0.06083838216779572,0.6440972222222222,0.08857092294475118,0.635483870967742,0.0836215300706891 +flat_mae,patch,logistic,adni_ad_vs_cn,33,0.046415888336127774,train,0.9051490514905149,0.013200309166917919,0.8542933537913061,0.02192950977634833,0.8248418111595037,0.024259409714220873 +flat_mae,patch,logistic,adni_ad_vs_cn,33,0.046415888336127774,test,0.7560975609756098,0.060379525428884015,0.6693548387096775,0.08292815339798883,0.6693548387096775,0.0845236155265837 +flat_mae,patch,logistic,adni_ad_vs_cn,34,0.3593813663804626,train,0.989159891598916,0.0051703197895160355,0.9845864661654136,0.007507254208830438,0.9767441860465116,0.011092139548438518 +flat_mae,patch,logistic,adni_ad_vs_cn,34,0.3593813663804626,test,0.7073170731707317,0.06469791607952081,0.6272727272727273,0.07927266970462828,0.6370967741935484,0.08539566331620944 +flat_mae,patch,logistic,adni_ad_vs_cn,35,0.3593813663804626,train,0.9728997289972899,0.00861952056223923,0.9617854183927093,0.012234508785564535,0.9580491412605802,0.014338751479010793 +flat_mae,patch,logistic,adni_ad_vs_cn,35,0.3593813663804626,test,0.7804878048780488,0.0470667504783885,0.6328358208955224,0.09430909214066663,0.6177419354838709,0.07652525376697483 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,test,0.5853658536585366,0.07274626475832228,0.5108771929824562,0.07636041931797441,0.5225806451612903,0.08689702177485871 +flat_mae,patch,logistic,adni_ad_vs_cn,37,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,37,21.54434690031882,test,0.6585365853658537,0.0657965244476277,0.5370967741935484,0.08200837575522467,0.5370967741935484,0.08217345427136831 +flat_mae,patch,logistic,adni_ad_vs_cn,38,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,38,2.782559402207126,test,0.6829268292682927,0.07070552921792547,0.6259649122807017,0.07808984477597361,0.6548387096774193,0.08795612589948819 +flat_mae,patch,logistic,adni_ad_vs_cn,39,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,39,2.782559402207126,test,0.7073170731707317,0.06068253470189359,0.5729166666666666,0.08709249823110983,0.5693548387096774,0.08167934175743069 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.046415888336127774,train,0.8888888888888888,0.014805346082855775,0.8293150715841013,0.02475609171384164,0.8020995973374969,0.02660474777801623 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.046415888336127774,test,0.7804878048780488,0.03855522858702796,0.5886287625418061,0.09388485894110113,0.5838709677419355,0.0675461314593965 +flat_mae,patch,logistic,adni_ad_vs_cn,41,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,41,166.81005372000556,test,0.6829268292682927,0.07020790066569005,0.6259649122807017,0.07632025330626781,0.6548387096774193,0.08646945386443067 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.046415888336127774,train,0.9024390243902439,0.014015451113373231,0.850862223818933,0.02300104465400776,0.8230750267072069,0.02503713527680013 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.046415888336127774,test,0.7317073170731707,0.04919734084503436,0.5512437810945273,0.08384285561479822,0.5516129032258065,0.06814751666095509 +flat_mae,patch,logistic,adni_ad_vs_cn,43,0.3593813663804626,train,0.981029810298103,0.006913457072304303,0.9729123189697663,0.010143963604792078,0.9633494946174705,0.01404954688569246 +flat_mae,patch,logistic,adni_ad_vs_cn,43,0.3593813663804626,test,0.8048780487804879,0.056199779617275535,0.7152777777777778,0.08306519300058633,0.7016129032258065,0.08249847039227791 +flat_mae,patch,logistic,adni_ad_vs_cn,44,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,44,166.81005372000556,test,0.7560975609756098,0.063495183364799,0.6693548387096775,0.0854384560495817,0.6693548387096775,0.08842292722067412 +flat_mae,patch,logistic,adni_ad_vs_cn,45,0.005994842503189409,train,0.8319783197831978,0.013105483712419446,0.695576964019587,0.030835197114391887,0.6638178979373819,0.025652868885398767 +flat_mae,patch,logistic,adni_ad_vs_cn,45,0.005994842503189409,test,0.8048780487804879,0.043103268218459174,0.6554621848739496,0.09649507827195314,0.6338709677419355,0.07570257124037298 +flat_mae,patch,logistic,adni_ad_vs_cn,46,0.3593813663804626,train,0.975609756097561,0.007898079101809004,0.9648738695859115,0.011745772882359038,0.9517215876407263,0.016052359138915884 +flat_mae,patch,logistic,adni_ad_vs_cn,46,0.3593813663804626,test,0.7317073170731707,0.06067822113352215,0.5918552036199095,0.09384681027510482,0.5854838709677419,0.08471789508822196 +flat_mae,patch,logistic,adni_ad_vs_cn,47,0.3593813663804626,train,0.978319783197832,0.007424485127877629,0.9686411149825784,0.011156130257273265,0.9534883720930232,0.01592811053596999 +flat_mae,patch,logistic,adni_ad_vs_cn,47,0.3593813663804626,test,0.8292682926829268,0.051660263910871265,0.7402714932126697,0.08729844578488048,0.717741935483871,0.0834135798090819 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.046415888336127774,train,0.8997289972899729,0.0145918730996236,0.8459672597222379,0.024465278160098605,0.8172610732188348,0.02693979792451807 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.046415888336127774,test,0.7317073170731707,0.058252644684430574,0.5918552036199095,0.08801675257101903,0.5854838709677419,0.07852136940128773 +flat_mae,patch,logistic,adni_ad_vs_cn,49,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,49,166.81005372000556,test,0.7804878048780488,0.06118240056750679,0.6917293233082706,0.09101993089519028,0.685483870967742,0.09009956771248741 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.046415888336127774,train,0.8915989159891599,0.014214869993002826,0.8274410774410774,0.02561959788186076,0.7917248746815679,0.026854814503297648 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.046415888336127774,test,0.8048780487804879,0.05947737886014956,0.7354838709677419,0.08310040401808406,0.7354838709677419,0.08407905173517825 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.046415888336127774,train,0.9051490514905149,0.013786694953778721,0.8557063851181499,0.022452159435180002,0.828888980195579,0.024579682840913837 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.046415888336127774,test,0.6097560975609756,0.06631511313136111,0.47096774193548385,0.07393612944974955,0.47096774193548385,0.0740439627404845 +flat_mae,patch,logistic,adni_ad_vs_cn,52,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,52,2.782559402207126,test,0.6829268292682927,0.06828571393013508,0.5839188134270101,0.08420015577910131,0.5870967741935484,0.08686000523765547 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,test,0.7560975609756098,0.06262517981560091,0.6693548387096775,0.08476517664920384,0.6693548387096775,0.08516410619733678 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.046415888336127774,train,0.9051490514905149,0.01318610297780385,0.8528389603582457,0.022712434183146626,0.8207946421234283,0.025155060874892596 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.046415888336127774,test,0.7560975609756098,0.062296636427438976,0.6693548387096775,0.08558144513005671,0.6693548387096775,0.08781634747867458 +flat_mae,patch,logistic,adni_ad_vs_cn,55,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,55,1291.5496650148827,test,0.7317073170731707,0.07095534032923356,0.6676492262343405,0.08109318103338878,0.6870967741935483,0.0875407367637042 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.3593813663804626,train,0.983739837398374,0.0061920151919071355,0.9766829555986183,0.009137407033096534,0.9651162790697674,0.013284032591940283 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.3593813663804626,test,0.7073170731707317,0.06041695299664794,0.5340909090909092,0.08796595273471947,0.535483870967742,0.07475847089008109 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.046415888336127774,train,0.9024390243902439,0.013864087579269936,0.8493877551020408,0.023452631018064253,0.8190278576711316,0.025792596293790382 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.046415888336127774,test,0.7317073170731707,0.05891495599974689,0.5918552036199095,0.09046240751303254,0.5854838709677419,0.07881801217148204 +flat_mae,patch,logistic,adni_ad_vs_cn,58,0.046415888336127774,train,0.8997289972899729,0.01387546155094713,0.8428467833834041,0.024192331105514327,0.8091667351466842,0.026354896096618733 +flat_mae,patch,logistic,adni_ad_vs_cn,58,0.046415888336127774,test,0.7804878048780488,0.05846456952164769,0.6917293233082706,0.08277927396106904,0.685483870967742,0.08311207601799397 +flat_mae,patch,logistic,adni_ad_vs_cn,59,0.046415888336127774,train,0.8943089430894309,0.013892878269013616,0.8360205558277596,0.023734305398910315,0.8056331662420906,0.025685758608824206 +flat_mae,patch,logistic,adni_ad_vs_cn,59,0.046415888336127774,test,0.6829268292682927,0.06332968043151177,0.5547201336675021,0.08528365518430765,0.5532258064516129,0.08317239996865106 +flat_mae,patch,logistic,adni_ad_vs_cn,60,0.046415888336127774,train,0.907859078590786,0.013468871767137462,0.8563215758131013,0.023134655687691713,0.8225614265757252,0.025372028619590967 +flat_mae,patch,logistic,adni_ad_vs_cn,60,0.046415888336127774,test,0.7073170731707317,0.0667399082109583,0.603225806451613,0.08810129387192715,0.603225806451613,0.08752916350622743 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.3593813663804626,train,0.981029810298103,0.0072817858340105245,0.9731387984733936,0.010462924019675659,0.9673966636535458,0.01335643075127596 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.3593813663804626,test,0.6829268292682927,0.06842028149379875,0.5839188134270101,0.08374608357767484,0.5870967741935484,0.08589112993710729 +flat_mae,patch,logistic,adni_ad_vs_cn,62,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,62,21.54434690031882,test,0.6341463414634146,0.07282853315101176,0.5467943994104643,0.08142039906497205,0.5548387096774194,0.08930133923041397 +flat_mae,patch,logistic,adni_ad_vs_cn,63,0.046415888336127774,train,0.9051490514905149,0.013562568821468389,0.8528389603582457,0.023275061797495306,0.8207946421234283,0.02563953514548608 +flat_mae,patch,logistic,adni_ad_vs_cn,63,0.046415888336127774,test,0.8292682926829268,0.04391530976647873,0.7144278606965174,0.08845070658018393,0.6838709677419355,0.0764171080430615 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.046415888336127774,train,0.8943089430894309,0.01471890639941865,0.8360205558277596,0.025253230946817657,0.8056331662420906,0.02717120699701904 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.046415888336127774,test,0.7560975609756098,0.06632300675829644,0.6693548387096775,0.0886367299935812,0.6693548387096775,0.08978233153104725 +flat_mae,patch,logistic,adni_ad_vs_cn,65,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,65,2.782559402207126,test,0.7317073170731707,0.06956726958150349,0.6479313036690086,0.08813981143556328,0.6532258064516129,0.09149851536640352 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.046415888336127774,train,0.8943089430894309,0.014118586817537407,0.8360205558277596,0.024041934416828892,0.8056331662420906,0.02596264293541954 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.046415888336127774,test,0.8048780487804879,0.05762499540287565,0.7354838709677419,0.07844391618614595,0.7354838709677419,0.08011970206964979 +flat_mae,patch,logistic,adni_ad_vs_cn,67,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,67,166.81005372000556,test,0.8048780487804879,0.05948747987391094,0.7515151515151515,0.07432930389051075,0.7693548387096774,0.0796772610681463 +flat_mae,patch,logistic,adni_ad_vs_cn,68,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,68,2.782559402207126,test,0.6341463414634146,0.07154712486704155,0.5858585858585859,0.07514457644915817,0.6225806451612903,0.08901446356770105 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,test,0.7560975609756098,0.0631631787555507,0.6693548387096775,0.08661260365606353,0.6693548387096775,0.08860881397847628 +flat_mae,patch,logistic,adni_ad_vs_cn,70,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,70,166.81005372000556,test,0.7804878048780488,0.05063901005824227,0.6328358208955224,0.0916985547020345,0.6177419354838709,0.07604860459039127 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.046415888336127774,train,0.8970189701897019,0.01346450012861766,0.8410204081632653,0.023122705156186963,0.8114471197304627,0.025608947928773083 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.046415888336127774,test,0.7317073170731707,0.025273000785603236,0.4225352112676056,0.008585721562303125,0.4838709677419355,0.016712790842092467 +flat_mae,patch,logistic,adni_ad_vs_cn,72,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,72,166.81005372000556,test,0.7073170731707317,0.05495937605233404,0.5340909090909092,0.08226291132227315,0.535483870967742,0.06953163744382226 +flat_mae,patch,logistic,adni_ad_vs_cn,73,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,73,166.81005372000556,test,0.7317073170731707,0.06565532917183121,0.6479313036690086,0.08484545509149703,0.6532258064516129,0.08813551875954169 +flat_mae,patch,logistic,adni_ad_vs_cn,74,0.3593813663804626,train,0.978319783197832,0.007361910201477377,0.9689106074648244,0.010863408328470367,0.9575355411290984,0.014960825815078294 +flat_mae,patch,logistic,adni_ad_vs_cn,74,0.3593813663804626,test,0.7560975609756098,0.05464538211991503,0.6117424242424243,0.09329905780675721,0.6016129032258064,0.08098409319537003 +flat_mae,patch,logistic,adni_ad_vs_cn,75,0.005994842503189409,train,0.8238482384823849,0.014882860891939314,0.6869820554649266,0.031961655011537145,0.6585175445804914,0.02676539481901497 +flat_mae,patch,logistic,adni_ad_vs_cn,75,0.005994842503189409,test,0.7804878048780488,0.02458149408150368,0.5275288092189501,0.0853385069223417,0.55,0.05039206286708255 +flat_mae,patch,logistic,adni_ad_vs_cn,76,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,76,21.54434690031882,test,0.6341463414634146,0.07067112628522604,0.5199063231850116,0.08142168507680905,0.5209677419354839,0.08386952132747816 +flat_mae,patch,logistic,adni_ad_vs_cn,77,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,77,2.782559402207126,test,0.8536585365853658,0.03846971660639475,0.7415966386554622,0.09319016829639222,0.7,0.07886291904310923 +flat_mae,patch,logistic,adni_ad_vs_cn,78,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,78,2.782559402207126,test,0.6341463414634146,0.0736405082120724,0.5467943994104643,0.08266790175095967,0.5548387096774194,0.08867424623806902 +flat_mae,patch,logistic,adni_ad_vs_cn,79,0.3593813663804626,train,0.9701897018970189,0.008909835521929794,0.957433644095347,0.013082654020298269,0.9481880187361328,0.016929182893697457 +flat_mae,patch,logistic,adni_ad_vs_cn,79,0.3593813663804626,test,0.7804878048780488,0.06445665459234878,0.7410526315789474,0.07149786604153614,0.7870967741935484,0.07699930375240986 +flat_mae,patch,logistic,adni_ad_vs_cn,80,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,80,21.54434690031882,test,0.6097560975609756,0.07679908040298142,0.5287356321839081,0.08240818878294155,0.5387096774193548,0.09045223844751711 +flat_mae,patch,logistic,adni_ad_vs_cn,81,0.046415888336127774,train,0.8997289972899729,0.013873495600819439,0.8444297580930026,0.0240078068468606,0.8132139041827595,0.02625984833009404 +flat_mae,patch,logistic,adni_ad_vs_cn,81,0.046415888336127774,test,0.8048780487804879,0.03302103956774291,0.6095238095238095,0.09927304418881826,0.6,0.06769313111387298 +flat_mae,patch,logistic,adni_ad_vs_cn,82,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,82,2.782559402207126,test,0.7073170731707317,0.061994530663944156,0.603225806451613,0.0841189340615236,0.603225806451613,0.08518020415578265 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,test,0.6829268292682927,0.0710317605395921,0.6072218128224024,0.07929481985323616,0.6209677419354839,0.08593298764901124 +flat_mae,patch,logistic,adni_ad_vs_cn,84,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,84,166.81005372000556,test,0.7317073170731707,0.0638443042208122,0.6232247284878863,0.08908878955611359,0.6193548387096774,0.0862795960170524 +flat_mae,patch,logistic,adni_ad_vs_cn,85,0.005994842503189409,train,0.8102981029810298,0.013777896114408897,0.6562965722801788,0.029931429138602115,0.6334949461747061,0.024008365963796883 +flat_mae,patch,logistic,adni_ad_vs_cn,85,0.005994842503189409,test,0.9024390243902439,0.03849964268165498,0.8446969696969697,0.07720031285867424,0.8,0.07892426749739272 +flat_mae,patch,logistic,adni_ad_vs_cn,86,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,86,10000.0,test,0.6585365853658537,0.07025616424144066,0.5651515151515152,0.08384720018560972,0.5709677419354839,0.09059275238450024 +flat_mae,patch,logistic,adni_ad_vs_cn,87,0.005994842503189409,train,0.8319783197831978,0.014939992778745389,0.7033146591970121,0.03227852380563545,0.6719122360095324,0.027814683125034767 +flat_mae,patch,logistic,adni_ad_vs_cn,87,0.005994842503189409,test,0.7804878048780488,0.0225593275601093,0.5275288092189501,0.07957919813043343,0.55,0.04624662149822406 +flat_mae,patch,logistic,adni_ad_vs_cn,88,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,88,21.54434690031882,test,0.7073170731707317,0.07048677524777579,0.6272727272727273,0.08430880006797126,0.6370967741935484,0.09023509236356196 +flat_mae,patch,logistic,adni_ad_vs_cn,89,0.046415888336127774,train,0.8970189701897019,0.01338089540597835,0.8377684191040355,0.024038374921056493,0.8033527816583121,0.026117119421895513 +flat_mae,patch,logistic,adni_ad_vs_cn,89,0.046415888336127774,test,0.7804878048780488,0.0598444145762876,0.6917293233082706,0.08678045300301061,0.685483870967742,0.08642448996772303 +flat_mae,patch,logistic,adni_ad_vs_cn,90,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,90,21.54434690031882,test,0.7073170731707317,0.07079600105666778,0.646551724137931,0.07889390833819296,0.6709677419354838,0.0868913146517802 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.046415888336127774,train,0.8970189701897019,0.01383791001084776,0.8377684191040355,0.024407080568531626,0.8033527816583121,0.026237194047954913 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.046415888336127774,test,0.7073170731707317,0.04846178818143033,0.4831932773109243,0.0719938283592637,0.5016129032258064,0.05586339617547491 +flat_mae,patch,logistic,adni_ad_vs_cn,92,0.046415888336127774,train,0.8997289972899729,0.013689493674259867,0.8459672597222379,0.02279974032365355,0.8172610732188348,0.024708871064894937 +flat_mae,patch,logistic,adni_ad_vs_cn,92,0.046415888336127774,test,0.7073170731707317,0.056855432059477296,0.5340909090909092,0.08853637640425947,0.535483870967742,0.07447375271807237 +flat_mae,patch,logistic,adni_ad_vs_cn,93,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,93,166.81005372000556,test,0.5853658536585366,0.07923074532877976,0.5465191932335719,0.07900119114548598,0.5903225806451613,0.0940853653494785 +flat_mae,patch,logistic,adni_ad_vs_cn,94,0.046415888336127774,train,0.9186991869918699,0.013111490890309774,0.8757185198491109,0.021729351753897144,0.8458172405292136,0.02463927527681158 +flat_mae,patch,logistic,adni_ad_vs_cn,94,0.046415888336127774,test,0.5853658536585366,0.06559297999321455,0.4177109440267335,0.061108753747717834,0.42096774193548386,0.062271549935115154 +flat_mae,patch,logistic,adni_ad_vs_cn,95,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,95,21.54434690031882,test,0.7073170731707317,0.06674311698209355,0.646551724137931,0.07480127684367027,0.6709677419354838,0.08202326362218301 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,test,0.6341463414634146,0.06923137112873086,0.5684210526315789,0.0753653572367766,0.5887096774193548,0.08614099228305212 +flat_mae,patch,logistic,adni_ad_vs_cn,97,0.3593813663804626,train,0.975609756097561,0.007545484417094005,0.9651729815325566,0.01101351258344199,0.9557687566768016,0.014319507362962497 +flat_mae,patch,logistic,adni_ad_vs_cn,97,0.3593813663804626,test,0.7560975609756098,0.06333599251334274,0.6693548387096775,0.0868502634465662,0.6693548387096775,0.08915521401030746 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.005994842503189409,train,0.8373983739837398,0.013888497317502203,0.7164446721311475,0.029216730725791416,0.6835401429862766,0.025436782333857502 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.005994842503189409,test,0.7317073170731707,0.057396934503702965,0.5918552036199095,0.08990367053169612,0.5854838709677419,0.08000736666134717 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.046415888336127774,train,0.8970189701897019,0.01495907858833113,0.8425767918088738,0.024738592863286356,0.815494288766538,0.026782164556596134 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.046415888336127774,test,0.7804878048780488,0.04633951215404192,0.6328358208955224,0.09129225806517206,0.6177419354838709,0.07412600537069593 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.046415888336127774,train,0.9132791327913279,0.013805144048092561,0.8674330878390515,0.023053320868157565,0.8382365025885447,0.02587391141744015 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.046415888336127774,test,0.7073170731707317,0.052819583374581494,0.5340909090909092,0.08066308292807091,0.535483870967742,0.06858881775417705 diff --git a/data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt b/data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..77c4b8de95ff97a1d042cba9d1f64adf2d1ef4dd --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:20:33 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:10:43 time: 3.9212 data: 3.1305 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:51 time: 0.1772 data: 0.0521 max mem: 2851 +extract (train) [ 40/164] eta: 0:00:31 time: 0.1505 data: 0.0389 max mem: 2851 +extract (train) [ 60/164] eta: 0:00:23 time: 0.1597 data: 0.0439 max mem: 2851 +extract (train) [ 80/164] eta: 0:00:17 time: 0.1488 data: 0.0391 max mem: 2851 +extract (train) [100/164] eta: 0:00:12 time: 0.1588 data: 0.0435 max mem: 2851 +extract (train) [120/164] eta: 0:00:08 time: 0.1377 data: 0.0336 max mem: 2851 +extract (train) [140/164] eta: 0:00:04 time: 0.1613 data: 0.0435 max mem: 2851 +extract (train) [160/164] eta: 0:00:00 time: 0.1356 data: 0.0327 max mem: 2851 +extract (train) [163/164] eta: 0:00:00 time: 0.1343 data: 0.0323 max mem: 2851 +extract (train) Total time: 0:00:29 (0.1781 s / it) +extract (validation) [ 0/21] eta: 0:01:11 time: 3.4228 data: 3.3173 max mem: 2851 +extract (validation) [20/21] eta: 0:00:00 time: 0.1354 data: 0.0322 max mem: 2851 +extract (validation) Total time: 0:00:06 (0.3043 s / it) +extract (test) [ 0/21] eta: 0:01:11 time: 3.3996 data: 3.2866 max mem: 2851 +extract (test) [20/21] eta: 0:00:00 time: 0.1294 data: 0.0321 max mem: 2851 +extract (test) Total time: 0:00:06 (0.2965 s / it) +feature extraction time: 0:00:41 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | | 2.7826 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | adni_ad_vs_cn | | 2.7826 | test | 0.68293 | 0.07119 | 0.60722 | 0.081977 | 0.63715 | 0.094539 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.044615456569465285, "f1": 0.7144278606965174, "f1_std": 0.09340480274059657, "bacc": 0.6838709677419355, "bacc_std": 0.08078069230787673} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06538905667538786, "f1": 0.5370967741935484, "f1_std": 0.08187743070866882, "bacc": 0.5370967741935484, "bacc_std": 0.08222176928546894} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.03283097157905182, "f1": 0.4142857142857143, "f1_std": 0.011405586722440925, "bacc": 0.46774193548387094, "bacc_std": 0.021710803786147183} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.8780487804878049, "acc_std": 0.04911444222889114, "f1": 0.8144796380090498, "f1_std": 0.0856773303969942, "bacc": 0.7838709677419355, "bacc_std": 0.08576489388982769} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05299983724846774, "f1": 0.6117424242424243, "f1_std": 0.09074299984464605, "bacc": 0.6016129032258064, "bacc_std": 0.07851778565326317} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.055439963294014204, "f1": 0.6660633484162897, "f1_std": 0.09086103084408982, "bacc": 0.6516129032258065, "bacc_std": 0.0833982256877615} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06298004934191503, "f1": 0.7119437939110069, "f1_std": 0.0836242294036542, "bacc": 0.7193548387096774, "bacc_std": 0.08865864872507646} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.062276847310106964, "f1": 0.6693548387096775, "f1_std": 0.08344388183577806, "bacc": 0.6693548387096775, "bacc_std": 0.08534050516032682} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06852453674147349, "f1": 0.6479313036690086, "f1_std": 0.08802308060663055, "bacc": 0.6532258064516129, "bacc_std": 0.09222527383659629} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05752977657993554, "f1": 0.5918552036199095, "f1_std": 0.08764680134574954, "bacc": 0.5854838709677419, "bacc_std": 0.07860463270722981} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04608377649109437, "f1": 0.6328358208955224, "f1_std": 0.08553723647886512, "bacc": 0.6177419354838709, "bacc_std": 0.07055688187274561} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06818639813002635, "f1": 0.6835087719298245, "f1_std": 0.07446293809244599, "bacc": 0.7209677419354839, "bacc_std": 0.08189114491191579} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06129624964145098, "f1": 0.5729166666666666, "f1_std": 0.0851123418248348, "bacc": 0.5693548387096774, "bacc_std": 0.07940571900183079} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.055148515075662416, "f1": 0.7152777777777778, "f1_std": 0.08295335683947047, "bacc": 0.7016129032258065, "bacc_std": 0.08160677931888531} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04905471698801797, "f1": 0.6328358208955224, "f1_std": 0.0935793641588938, "bacc": 0.6177419354838709, "bacc_std": 0.07665219800434285} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05054553080260012, "f1": 0.6893939393939394, "f1_std": 0.09268677112506415, "bacc": 0.667741935483871, "bacc_std": 0.08232770714141972} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 10000.0, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07240804133335124, "f1": 0.5467943994104643, "f1_std": 0.08136393805526873, "bacc": 0.5548387096774194, "bacc_std": 0.08832380498220657} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06014014962293201, "f1": 0.6440972222222222, "f1_std": 0.0909942791935425, "bacc": 0.635483870967742, "bacc_std": 0.08612247232478888} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06663950269511851, "f1": 0.6693548387096775, "f1_std": 0.08651149879664517, "bacc": 0.6693548387096775, "bacc_std": 0.08716104462609528} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.061023902827009785, "f1": 0.6917293233082706, "f1_std": 0.0859515648188523, "bacc": 0.685483870967742, "bacc_std": 0.08622919505065721} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.050141975704424815, "f1": 0.6893939393939394, "f1_std": 0.09304177549058085, "bacc": 0.667741935483871, "bacc_std": 0.08195253467493364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05387972151230834, "f1": 0.7152777777777778, "f1_std": 0.0837612798474481, "bacc": 0.7016129032258065, "bacc_std": 0.08098127376963613} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.044049185639494816, "f1": 0.6554621848739496, "f1_std": 0.0971305937716581, "bacc": 0.6338709677419355, "bacc_std": 0.07753342161430543} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06429009210516919, "f1": 0.6232247284878863, "f1_std": 0.08753730109380746, "bacc": 0.6193548387096774, "bacc_std": 0.08564302141789398} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06811564159998082, "f1": 0.6272727272727273, "f1_std": 0.08113468788804802, "bacc": 0.6370967741935484, "bacc_std": 0.08670078553438183} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06170565889992301, "f1": 0.6440972222222222, "f1_std": 0.08828459934581037, "bacc": 0.635483870967742, "bacc_std": 0.0828625453096422} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 21.54434690031882, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.04548567967310384, "f1": 0.7144278606965174, "f1_std": 0.09168480968679629, "bacc": 0.6838709677419355, "bacc_std": 0.08039465732322407} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05800367175396999, "f1": 0.7354838709677419, "f1_std": 0.07882295407806768, "bacc": 0.7354838709677419, "bacc_std": 0.08248821843791629} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05232204763433492, "f1": 0.6893939393939394, "f1_std": 0.09536650707830106, "bacc": 0.667741935483871, "bacc_std": 0.08442171505569435} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.053265321993462324, "f1": 0.6660633484162897, "f1_std": 0.09090586944108178, "bacc": 0.6516129032258065, "bacc_std": 0.08349214017728838} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04866471627730898, "f1": 0.5512437810945273, "f1_std": 0.08473903330474096, "bacc": 0.5516129032258065, "bacc_std": 0.06804048337373515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06083838216779572, "f1": 0.6440972222222222, "f1_std": 0.08857092294475118, "bacc": 0.635483870967742, "bacc_std": 0.0836215300706891} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.060379525428884015, "f1": 0.6693548387096775, "f1_std": 0.08292815339798883, "bacc": 0.6693548387096775, "bacc_std": 0.0845236155265837} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06469791607952081, "f1": 0.6272727272727273, "f1_std": 0.07927266970462828, "bacc": 0.6370967741935484, "bacc_std": 0.08539566331620944} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0470667504783885, "f1": 0.6328358208955224, "f1_std": 0.09430909214066663, "bacc": 0.6177419354838709, "bacc_std": 0.07652525376697483} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 166.81005372000556, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.07274626475832228, "f1": 0.5108771929824562, "f1_std": 0.07636041931797441, "bacc": 0.5225806451612903, "bacc_std": 0.08689702177485871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.0657965244476277, "f1": 0.5370967741935484, "f1_std": 0.08200837575522467, "bacc": 0.5370967741935484, "bacc_std": 0.08217345427136831} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07070552921792547, "f1": 0.6259649122807017, "f1_std": 0.07808984477597361, "bacc": 0.6548387096774193, "bacc_std": 0.08795612589948819} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06068253470189359, "f1": 0.5729166666666666, "f1_std": 0.08709249823110983, "bacc": 0.5693548387096774, "bacc_std": 0.08167934175743069} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.03855522858702796, "f1": 0.5886287625418061, "f1_std": 0.09388485894110113, "bacc": 0.5838709677419355, "bacc_std": 0.0675461314593965} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07020790066569005, "f1": 0.6259649122807017, "f1_std": 0.07632025330626781, "bacc": 0.6548387096774193, "bacc_std": 0.08646945386443067} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04919734084503436, "f1": 0.5512437810945273, "f1_std": 0.08384285561479822, "bacc": 0.5516129032258065, "bacc_std": 0.06814751666095509} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.056199779617275535, "f1": 0.7152777777777778, "f1_std": 0.08306519300058633, "bacc": 0.7016129032258065, "bacc_std": 0.08249847039227791} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.063495183364799, "f1": 0.6693548387096775, "f1_std": 0.0854384560495817, "bacc": 0.6693548387096775, "bacc_std": 0.08842292722067412} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.043103268218459174, "f1": 0.6554621848739496, "f1_std": 0.09649507827195314, "bacc": 0.6338709677419355, "bacc_std": 0.07570257124037298} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06067822113352215, "f1": 0.5918552036199095, "f1_std": 0.09384681027510482, "bacc": 0.5854838709677419, "bacc_std": 0.08471789508822196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.051660263910871265, "f1": 0.7402714932126697, "f1_std": 0.08729844578488048, "bacc": 0.717741935483871, "bacc_std": 0.0834135798090819} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.058252644684430574, "f1": 0.5918552036199095, "f1_std": 0.08801675257101903, "bacc": 0.5854838709677419, "bacc_std": 0.07852136940128773} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06118240056750679, "f1": 0.6917293233082706, "f1_std": 0.09101993089519028, "bacc": 0.685483870967742, "bacc_std": 0.09009956771248741} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05947737886014956, "f1": 0.7354838709677419, "f1_std": 0.08310040401808406, "bacc": 0.7354838709677419, "bacc_std": 0.08407905173517825} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06631511313136111, "f1": 0.47096774193548385, "f1_std": 0.07393612944974955, "bacc": 0.47096774193548385, "bacc_std": 0.0740439627404845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06828571393013508, "f1": 0.5839188134270101, "f1_std": 0.08420015577910131, "bacc": 0.5870967741935484, "bacc_std": 0.08686000523765547} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06262517981560091, "f1": 0.6693548387096775, "f1_std": 0.08476517664920384, "bacc": 0.6693548387096775, "bacc_std": 0.08516410619733678} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.062296636427438976, "f1": 0.6693548387096775, "f1_std": 0.08558144513005671, "bacc": 0.6693548387096775, "bacc_std": 0.08781634747867458} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 1291.5496650148827, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.07095534032923356, "f1": 0.6676492262343405, "f1_std": 0.08109318103338878, "bacc": 0.6870967741935483, "bacc_std": 0.0875407367637042} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06041695299664794, "f1": 0.5340909090909092, "f1_std": 0.08796595273471947, "bacc": 0.535483870967742, "bacc_std": 0.07475847089008109} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05891495599974689, "f1": 0.5918552036199095, "f1_std": 0.09046240751303254, "bacc": 0.5854838709677419, "bacc_std": 0.07881801217148204} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05846456952164769, "f1": 0.6917293233082706, "f1_std": 0.08277927396106904, "bacc": 0.685483870967742, "bacc_std": 0.08311207601799397} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06332968043151177, "f1": 0.5547201336675021, "f1_std": 0.08528365518430765, "bacc": 0.5532258064516129, "bacc_std": 0.08317239996865106} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0667399082109583, "f1": 0.603225806451613, "f1_std": 0.08810129387192715, "bacc": 0.603225806451613, "bacc_std": 0.08752916350622743} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06842028149379875, "f1": 0.5839188134270101, "f1_std": 0.08374608357767484, "bacc": 0.5870967741935484, "bacc_std": 0.08589112993710729} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 21.54434690031882, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07282853315101176, "f1": 0.5467943994104643, "f1_std": 0.08142039906497205, "bacc": 0.5548387096774194, "bacc_std": 0.08930133923041397} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.04391530976647873, "f1": 0.7144278606965174, "f1_std": 0.08845070658018393, "bacc": 0.6838709677419355, "bacc_std": 0.0764171080430615} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06632300675829644, "f1": 0.6693548387096775, "f1_std": 0.0886367299935812, "bacc": 0.6693548387096775, "bacc_std": 0.08978233153104725} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06956726958150349, "f1": 0.6479313036690086, "f1_std": 0.08813981143556328, "bacc": 0.6532258064516129, "bacc_std": 0.09149851536640352} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05762499540287565, "f1": 0.7354838709677419, "f1_std": 0.07844391618614595, "bacc": 0.7354838709677419, "bacc_std": 0.08011970206964979} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05948747987391094, "f1": 0.7515151515151515, "f1_std": 0.07432930389051075, "bacc": 0.7693548387096774, "bacc_std": 0.0796772610681463} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 2.782559402207126, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07154712486704155, "f1": 0.5858585858585859, "f1_std": 0.07514457644915817, "bacc": 0.6225806451612903, "bacc_std": 0.08901446356770105} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0631631787555507, "f1": 0.6693548387096775, "f1_std": 0.08661260365606353, "bacc": 0.6693548387096775, "bacc_std": 0.08860881397847628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05063901005824227, "f1": 0.6328358208955224, "f1_std": 0.0916985547020345, "bacc": 0.6177419354838709, "bacc_std": 0.07604860459039127} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.025273000785603236, "f1": 0.4225352112676056, "f1_std": 0.008585721562303125, "bacc": 0.4838709677419355, "bacc_std": 0.016712790842092467} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05495937605233404, "f1": 0.5340909090909092, "f1_std": 0.08226291132227315, "bacc": 0.535483870967742, "bacc_std": 0.06953163744382226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06565532917183121, "f1": 0.6479313036690086, "f1_std": 0.08484545509149703, "bacc": 0.6532258064516129, "bacc_std": 0.08813551875954169} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05464538211991503, "f1": 0.6117424242424243, "f1_std": 0.09329905780675721, "bacc": 0.6016129032258064, "bacc_std": 0.08098409319537003} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.02458149408150368, "f1": 0.5275288092189501, "f1_std": 0.0853385069223417, "bacc": 0.55, "bacc_std": 0.05039206286708255} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 21.54434690031882, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07067112628522604, "f1": 0.5199063231850116, "f1_std": 0.08142168507680905, "bacc": 0.5209677419354839, "bacc_std": 0.08386952132747816} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 2.782559402207126, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.03846971660639475, "f1": 0.7415966386554622, "f1_std": 0.09319016829639222, "bacc": 0.7, "bacc_std": 0.07886291904310923} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 2.782559402207126, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.0736405082120724, "f1": 0.5467943994104643, "f1_std": 0.08266790175095967, "bacc": 0.5548387096774194, "bacc_std": 0.08867424623806902} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06445665459234878, "f1": 0.7410526315789474, "f1_std": 0.07149786604153614, "bacc": 0.7870967741935484, "bacc_std": 0.07699930375240986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 21.54434690031882, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07679908040298142, "f1": 0.5287356321839081, "f1_std": 0.08240818878294155, "bacc": 0.5387096774193548, "bacc_std": 0.09045223844751711} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.03302103956774291, "f1": 0.6095238095238095, "f1_std": 0.09927304418881826, "bacc": 0.6, "bacc_std": 0.06769313111387298} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.061994530663944156, "f1": 0.603225806451613, "f1_std": 0.0841189340615236, "bacc": 0.603225806451613, "bacc_std": 0.08518020415578265} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.0710317605395921, "f1": 0.6072218128224024, "f1_std": 0.07929481985323616, "bacc": 0.6209677419354839, "bacc_std": 0.08593298764901124} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.0638443042208122, "f1": 0.6232247284878863, "f1_std": 0.08908878955611359, "bacc": 0.6193548387096774, "bacc_std": 0.0862795960170524} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.9024390243902439, "acc_std": 0.03849964268165498, "f1": 0.8446969696969697, "f1_std": 0.07720031285867424, "bacc": 0.8, "bacc_std": 0.07892426749739272} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 10000.0, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07025616424144066, "f1": 0.5651515151515152, "f1_std": 0.08384720018560972, "bacc": 0.5709677419354839, "bacc_std": 0.09059275238450024} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0225593275601093, "f1": 0.5275288092189501, "f1_std": 0.07957919813043343, "bacc": 0.55, "bacc_std": 0.04624662149822406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07048677524777579, "f1": 0.6272727272727273, "f1_std": 0.08430880006797126, "bacc": 0.6370967741935484, "bacc_std": 0.09023509236356196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0598444145762876, "f1": 0.6917293233082706, "f1_std": 0.08678045300301061, "bacc": 0.685483870967742, "bacc_std": 0.08642448996772303} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07079600105666778, "f1": 0.646551724137931, "f1_std": 0.07889390833819296, "bacc": 0.6709677419354838, "bacc_std": 0.0868913146517802} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.04846178818143033, "f1": 0.4831932773109243, "f1_std": 0.0719938283592637, "bacc": 0.5016129032258064, "bacc_std": 0.05586339617547491} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.056855432059477296, "f1": 0.5340909090909092, "f1_std": 0.08853637640425947, "bacc": 0.535483870967742, "bacc_std": 0.07447375271807237} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 166.81005372000556, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.07923074532877976, "f1": 0.5465191932335719, "f1_std": 0.07900119114548598, "bacc": 0.5903225806451613, "bacc_std": 0.0940853653494785} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.06559297999321455, "f1": 0.4177109440267335, "f1_std": 0.061108753747717834, "bacc": 0.42096774193548386, "bacc_std": 0.062271549935115154} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06674311698209355, "f1": 0.646551724137931, "f1_std": 0.07480127684367027, "bacc": 0.6709677419354838, "bacc_std": 0.08202326362218301} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06923137112873086, "f1": 0.5684210526315789, "f1_std": 0.0753653572367766, "bacc": 0.5887096774193548, "bacc_std": 0.08614099228305212} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06333599251334274, "f1": 0.6693548387096775, "f1_std": 0.0868502634465662, "bacc": 0.6693548387096775, "bacc_std": 0.08915521401030746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.057396934503702965, "f1": 0.5918552036199095, "f1_std": 0.08990367053169612, "bacc": 0.5854838709677419, "bacc_std": 0.08000736666134717} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04633951215404192, "f1": 0.6328358208955224, "f1_std": 0.09129225806517206, "bacc": 0.6177419354838709, "bacc_std": 0.07412600537069593} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.052819583374581494, "f1": 0.5340909090909092, "f1_std": 0.08066308292807091, "bacc": 0.535483870967742, "bacc_std": 0.06858881775417705} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | train | 100 | 239.25 | 1408.3 | 0.95084 | 0.054517 | 0.92202 | 0.090557 | 0.90785 | 0.1033 | +| flat_mae | patch | logistic | adni_ad_vs_cn | test | 100 | 239.25 | 1408.3 | 0.73927 | 0.063894 | 0.62514 | 0.080067 | 0.62537 | 0.072669 | + + +done! total time: 0:04:31 diff --git a/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/config.yaml b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..349d0749468e1111e37d2ebee11aeeb3d2baf75d --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn +remote_dir: null diff --git a/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_log.json b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..cbc0e2a4cb9dd4044faf0be2a5d285726f8c7570 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 14, "eval/id_best": 35, "eval/lr_best": 0.0018, "eval/wd_best": 0.05, "eval/train/loss": 4.079829159309156e-05, "eval/train/acc": 1.0, "eval/train/acc_std": 0.0, "eval/train/f1": 1.0, "eval/train/f1_std": 0.0, "eval/validation/loss": 0.04726891964673996, "eval/validation/acc": 0.9923115079365079, "eval/validation/acc_std": 0.001394593143699758, "eval/validation/f1": 0.9909580918726532, "eval/validation/f1_std": 0.0018312636287125701, "eval/test/loss": 0.0648389458656311, "eval/test/acc": 0.9900793650793651, "eval/test/acc_std": 0.0013837034151484417, "eval/test/f1": 0.9885227082201382, "eval/test/f1_std": 0.001718861469679251} diff --git a/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..f5c97fc882f06c048de6bc27dbc428d6fd41d0c4 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 14, "eval/best/id_best": 35, "eval/best/lr_best": 0.0018, "eval/best/wd_best": 0.05, "eval/best/train/loss": 4.079829159309156e-05, "eval/best/train/acc": 1.0, "eval/best/train/acc_std": 0.0, "eval/best/train/f1": 1.0, "eval/best/train/f1_std": 0.0, "eval/best/validation/loss": 0.04726891964673996, "eval/best/validation/acc": 0.9923115079365079, "eval/best/validation/acc_std": 0.001394593143699758, "eval/best/validation/f1": 0.9909580918726532, "eval/best/validation/f1_std": 0.0018312636287125701, "eval/best/test/loss": 0.0648389458656311, "eval/best/test/acc": 0.9900793650793651, "eval/best/test/acc_std": 0.0013837034151484417, "eval/best/test/f1": 0.9885227082201382, "eval/best/test/f1_std": 0.001718861469679251} diff --git a/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..de3eef7495a54714191be1e040b76778e5e98258 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 35, "eval/last/lr_best": 0.0018, "eval/last/wd_best": 0.05, "eval/last/train/loss": 3.980837209383026e-05, "eval/last/train/acc": 1.0, "eval/last/train/acc_std": 0.0, "eval/last/train/f1": 1.0, "eval/last/train/f1_std": 0.0, "eval/last/validation/loss": 0.04662461578845978, "eval/last/validation/acc": 0.9923115079365079, "eval/last/validation/acc_std": 0.001377199247868492, "eval/last/validation/f1": 0.9910555649604573, "eval/last/validation/f1_std": 0.0018081433238288382, "eval/last/test/loss": 0.06386362761259079, "eval/last/test/acc": 0.9902777777777778, "eval/last/test/acc_std": 0.0013742425018964562, "eval/last/test/f1": 0.9886659786745963, "eval/last/test/f1_std": 0.0017185115791900094} diff --git a/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..2965e783e7b8459d7d26eceb460aea2de9ae233c --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,14,0.0018,0.05,35,"[6, 1.0]",train,4.079829159309156e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,14,0.0018,0.05,35,"[6, 1.0]",validation,0.04726891964673996,0.9923115079365079,0.001394593143699758,0.9909580918726532,0.0018312636287125701 +flat_mae,patch,attn,hcpya_task21,best,14,0.0018,0.05,35,"[6, 1.0]",test,0.0648389458656311,0.9900793650793651,0.0013837034151484417,0.9885227082201382,0.001718861469679251 diff --git a/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..2965e783e7b8459d7d26eceb460aea2de9ae233c --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,14,0.0018,0.05,35,"[6, 1.0]",train,4.079829159309156e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,14,0.0018,0.05,35,"[6, 1.0]",validation,0.04726891964673996,0.9923115079365079,0.001394593143699758,0.9909580918726532,0.0018312636287125701 +flat_mae,patch,attn,hcpya_task21,best,14,0.0018,0.05,35,"[6, 1.0]",test,0.0648389458656311,0.9900793650793651,0.0013837034151484417,0.9885227082201382,0.001718861469679251 diff --git a/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..44427c1cb5bc4919cd396cafbe8e517464bae233 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,last,19,0.0018,0.05,35,"[6, 1.0]",train,3.980837209383026e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,last,19,0.0018,0.05,35,"[6, 1.0]",validation,0.04662461578845978,0.9923115079365079,0.001377199247868492,0.9910555649604573,0.0018081433238288382 +flat_mae,patch,attn,hcpya_task21,last,19,0.0018,0.05,35,"[6, 1.0]",test,0.06386362761259079,0.9902777777777778,0.0013742425018964562,0.9886659786745963,0.0017185115791900094 diff --git a/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/log.txt b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9fc59442dc00efba1bbc3f00bd517c0a27f6fbe --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/log.txt @@ -0,0 +1,886 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 19:47:30 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 58.7M (58.7M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:23:43 lr: nan time: 3.5599 data: 2.9386 max mem: 21740 +train: [0] [ 20/400] eta: 0:04:00 lr: 0.000003 loss: 3.0615 (3.0652) grad: 0.2631 (0.2762) time: 0.4861 data: 0.0053 max mem: 22446 +train: [0] [ 40/400] eta: 0:03:17 lr: 0.000006 loss: 3.0250 (3.0250) grad: 0.2631 (0.2747) time: 0.4608 data: 0.0033 max mem: 22446 +train: [0] [ 60/400] eta: 0:02:57 lr: 0.000009 loss: 2.9128 (2.9658) grad: 0.2622 (0.2703) time: 0.4675 data: 0.0032 max mem: 22446 +train: [0] [ 80/400] eta: 0:02:43 lr: 0.000012 loss: 2.7791 (2.9031) grad: 0.2532 (0.2634) time: 0.4825 data: 0.0035 max mem: 22446 +train: [0] [100/400] eta: 0:02:30 lr: 0.000015 loss: 2.6150 (2.8343) grad: 0.2359 (0.2589) time: 0.4631 data: 0.0031 max mem: 22446 +train: [0] [120/400] eta: 0:02:18 lr: 0.000018 loss: 2.5045 (2.7646) grad: 0.2359 (0.2540) time: 0.4620 data: 0.0033 max mem: 22446 +train: [0] [140/400] eta: 0:02:08 lr: 0.000021 loss: 2.3451 (2.6970) grad: 0.2321 (0.2521) time: 0.4721 data: 0.0036 max mem: 22446 +train: [0] [160/400] eta: 0:01:57 lr: 0.000024 loss: 2.2379 (2.6362) grad: 0.2246 (0.2474) time: 0.4627 data: 0.0033 max mem: 22446 +train: [0] [180/400] eta: 0:01:46 lr: 0.000027 loss: 2.1373 (2.5754) grad: 0.2068 (0.2430) time: 0.4655 data: 0.0034 max mem: 22446 +train: [0] [200/400] eta: 0:01:36 lr: 0.000030 loss: 2.0759 (2.5178) grad: 0.2057 (0.2399) time: 0.4605 data: 0.0033 max mem: 22446 +train: [0] [220/400] eta: 0:01:26 lr: 0.000033 loss: 1.9509 (2.4638) grad: 0.2009 (0.2362) time: 0.4623 data: 0.0032 max mem: 22446 +train: [0] [240/400] eta: 0:01:16 lr: 0.000036 loss: 1.8583 (2.4098) grad: 0.2017 (0.2340) time: 0.4608 data: 0.0032 max mem: 22446 +train: [0] [260/400] eta: 0:01:07 lr: 0.000039 loss: 1.7905 (2.3610) grad: 0.2057 (0.2316) time: 0.4669 data: 0.0034 max mem: 22446 +train: [0] [280/400] eta: 0:00:57 lr: 0.000042 loss: 1.7632 (2.3171) grad: 0.1906 (0.2283) time: 0.4573 data: 0.0034 max mem: 22446 +train: [0] [300/400] eta: 0:00:48 lr: 0.000045 loss: 1.7106 (2.2747) grad: 0.1743 (0.2249) time: 0.6335 data: 0.1722 max mem: 22446 +train: [0] [320/400] eta: 0:00:38 lr: 0.000048 loss: 1.6594 (2.2333) grad: 0.1732 (0.2219) time: 0.4712 data: 0.0029 max mem: 22446 +train: [0] [340/400] eta: 0:00:29 lr: 0.000051 loss: 1.5760 (2.1942) grad: 0.1818 (0.2199) time: 0.4634 data: 0.0033 max mem: 22446 +train: [0] [360/400] eta: 0:00:19 lr: 0.000054 loss: 1.5491 (2.1580) grad: 0.1786 (0.2176) time: 0.4635 data: 0.0034 max mem: 22446 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 1.5176 (2.1229) grad: 0.1742 (0.2151) time: 0.4655 data: 0.0033 max mem: 22446 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.4551 (2.0882) grad: 0.1696 (0.2132) time: 0.4619 data: 0.0034 max mem: 22446 +train: [0] Total time: 0:03:12 (0.4825 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.4551 (2.0882) grad: 0.1696 (0.2132) +eval (validation): [0] [ 0/63] eta: 0:03:22 time: 3.2190 data: 2.9326 max mem: 22446 +eval (validation): [0] [20/63] eta: 0:00:21 time: 0.3698 data: 0.0033 max mem: 22446 +eval (validation): [0] [40/63] eta: 0:00:09 time: 0.3322 data: 0.0030 max mem: 22446 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3314 data: 0.0031 max mem: 22446 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3310 data: 0.0031 max mem: 22446 +eval (validation): [0] Total time: 0:00:24 (0.3942 s / it) +cv: [0] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 0.049 acc: 0.984 f1: 0.981 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:22:28 lr: nan time: 3.3707 data: 2.9630 max mem: 22446 +train: [1] [ 20/400] eta: 0:03:50 lr: 0.000063 loss: 1.3942 (1.4149) grad: 0.1684 (0.1736) time: 0.4696 data: 0.0036 max mem: 22446 +train: [1] [ 40/400] eta: 0:03:11 lr: 0.000066 loss: 1.3930 (1.3923) grad: 0.1700 (0.1721) time: 0.4550 data: 0.0033 max mem: 22446 +train: [1] [ 60/400] eta: 0:02:52 lr: 0.000069 loss: 1.3498 (1.3711) grad: 0.1650 (0.1682) time: 0.4538 data: 0.0033 max mem: 22446 +train: [1] [ 80/400] eta: 0:02:39 lr: 0.000072 loss: 1.3147 (1.3547) grad: 0.1558 (0.1662) time: 0.4780 data: 0.0033 max mem: 22446 +train: [1] [100/400] eta: 0:02:28 lr: 0.000075 loss: 1.2952 (1.3424) grad: 0.1595 (0.1654) time: 0.4679 data: 0.0034 max mem: 22446 +train: [1] [120/400] eta: 0:02:16 lr: 0.000078 loss: 1.2571 (1.3233) grad: 0.1537 (0.1637) time: 0.4508 data: 0.0034 max mem: 22446 +train: [1] [140/400] eta: 0:02:05 lr: 0.000081 loss: 1.2158 (1.3078) grad: 0.1487 (0.1619) time: 0.4488 data: 0.0035 max mem: 22446 +train: [1] [160/400] eta: 0:01:54 lr: 0.000084 loss: 1.2008 (1.2913) grad: 0.1450 (0.1604) time: 0.4552 data: 0.0033 max mem: 22446 +train: [1] [180/400] eta: 0:01:44 lr: 0.000087 loss: 1.1780 (1.2773) grad: 0.1480 (0.1597) time: 0.4588 data: 0.0034 max mem: 22446 +train: [1] [200/400] eta: 0:01:34 lr: 0.000090 loss: 1.1451 (1.2629) grad: 0.1461 (0.1587) time: 0.4624 data: 0.0035 max mem: 22446 +train: [1] [220/400] eta: 0:01:24 lr: 0.000093 loss: 1.1048 (1.2478) grad: 0.1498 (0.1590) time: 0.4492 data: 0.0034 max mem: 22446 +train: [1] [240/400] eta: 0:01:15 lr: 0.000096 loss: 1.0936 (1.2343) grad: 0.1549 (0.1581) time: 0.4738 data: 0.0034 max mem: 22446 +train: [1] [260/400] eta: 0:01:06 lr: 0.000099 loss: 1.0840 (1.2223) grad: 0.1484 (0.1573) time: 0.4638 data: 0.0035 max mem: 22446 +train: [1] [280/400] eta: 0:00:56 lr: 0.000102 loss: 1.0619 (1.2094) grad: 0.1418 (0.1567) time: 0.4585 data: 0.0036 max mem: 22446 +train: [1] [300/400] eta: 0:00:48 lr: 0.000105 loss: 1.0303 (1.1969) grad: 0.1368 (0.1553) time: 0.6172 data: 0.1717 max mem: 22446 +train: [1] [320/400] eta: 0:00:38 lr: 0.000108 loss: 1.0117 (1.1852) grad: 0.1368 (0.1544) time: 0.4794 data: 0.0032 max mem: 22446 +train: [1] [340/400] eta: 0:00:28 lr: 0.000111 loss: 0.9867 (1.1725) grad: 0.1378 (0.1532) time: 0.4590 data: 0.0035 max mem: 22446 +train: [1] [360/400] eta: 0:00:19 lr: 0.000114 loss: 0.9900 (1.1622) grad: 0.1335 (0.1522) time: 0.4525 data: 0.0034 max mem: 22446 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 0.9685 (1.1515) grad: 0.1389 (0.1518) time: 0.4668 data: 0.0033 max mem: 22446 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 0.9377 (1.1422) grad: 0.1442 (0.1513) time: 0.4642 data: 0.0034 max mem: 22446 +train: [1] Total time: 0:03:10 (0.4768 s / it) +train: [1] Summary: lr: 0.000120 loss: 0.9377 (1.1422) grad: 0.1442 (0.1513) +eval (validation): [1] [ 0/63] eta: 0:03:19 time: 3.1607 data: 2.8810 max mem: 22446 +eval (validation): [1] [20/63] eta: 0:00:21 time: 0.3627 data: 0.0033 max mem: 22446 +eval (validation): [1] [40/63] eta: 0:00:09 time: 0.3411 data: 0.0032 max mem: 22446 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3387 data: 0.0032 max mem: 22446 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3374 data: 0.0032 max mem: 22446 +eval (validation): [1] Total time: 0:00:24 (0.3960 s / it) +cv: [1] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 0.035 acc: 0.990 f1: 0.988 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:22:40 lr: nan time: 3.4010 data: 3.0533 max mem: 22446 +train: [2] [ 20/400] eta: 0:03:53 lr: 0.000123 loss: 0.9055 (0.9064) grad: 0.1517 (0.1576) time: 0.4746 data: 0.0035 max mem: 22446 +train: [2] [ 40/400] eta: 0:03:12 lr: 0.000126 loss: 0.9055 (0.9226) grad: 0.1575 (0.1652) time: 0.4526 data: 0.0029 max mem: 22446 +train: [2] [ 60/400] eta: 0:02:54 lr: 0.000129 loss: 0.8981 (0.9071) grad: 0.1617 (0.1646) time: 0.4691 data: 0.0034 max mem: 22446 +train: [2] [ 80/400] eta: 0:02:40 lr: 0.000132 loss: 0.8738 (0.9018) grad: 0.1578 (0.1629) time: 0.4703 data: 0.0034 max mem: 22446 +train: [2] [100/400] eta: 0:02:27 lr: 0.000135 loss: 0.8721 (0.8938) grad: 0.1516 (0.1614) time: 0.4536 data: 0.0035 max mem: 22446 +train: [2] [120/400] eta: 0:02:16 lr: 0.000138 loss: 0.8637 (0.8906) grad: 0.1499 (0.1617) time: 0.4603 data: 0.0034 max mem: 22446 +train: [2] [140/400] eta: 0:02:05 lr: 0.000141 loss: 0.8459 (0.8854) grad: 0.1665 (0.1634) time: 0.4634 data: 0.0034 max mem: 22446 +train: [2] [160/400] eta: 0:01:55 lr: 0.000144 loss: 0.8275 (0.8840) grad: 0.1705 (0.1675) time: 0.4594 data: 0.0034 max mem: 22446 +train: [2] [180/400] eta: 0:01:45 lr: 0.000147 loss: 0.8726 (0.8837) grad: 0.1724 (0.1699) time: 0.4787 data: 0.0034 max mem: 22446 +train: [2] [200/400] eta: 0:01:35 lr: 0.000150 loss: 0.8188 (0.8766) grad: 0.1788 (0.1722) time: 0.4651 data: 0.0034 max mem: 22446 +train: [2] [220/400] eta: 0:01:25 lr: 0.000153 loss: 0.8188 (0.8769) grad: 0.1726 (0.1733) time: 0.4570 data: 0.0032 max mem: 22446 +train: [2] [240/400] eta: 0:01:16 lr: 0.000156 loss: 0.8289 (0.8719) grad: 0.1848 (0.1752) time: 0.4828 data: 0.0033 max mem: 22446 +train: [2] [260/400] eta: 0:01:06 lr: 0.000159 loss: 0.7858 (0.8660) grad: 0.1854 (0.1767) time: 0.4696 data: 0.0033 max mem: 22446 +train: [2] [280/400] eta: 0:00:57 lr: 0.000162 loss: 0.7879 (0.8636) grad: 0.1886 (0.1811) time: 0.4624 data: 0.0033 max mem: 22446 +train: [2] [300/400] eta: 0:00:48 lr: 0.000165 loss: 0.8053 (0.8604) grad: 0.2151 (0.1839) time: 0.6173 data: 0.1749 max mem: 22446 +train: [2] [320/400] eta: 0:00:38 lr: 0.000168 loss: 0.7675 (0.8577) grad: 0.2229 (0.1881) time: 0.4763 data: 0.0031 max mem: 22446 +train: [2] [340/400] eta: 0:00:28 lr: 0.000171 loss: 0.7873 (0.8560) grad: 0.2425 (0.1923) time: 0.4514 data: 0.0029 max mem: 22446 +train: [2] [360/400] eta: 0:00:19 lr: 0.000174 loss: 0.7873 (0.8544) grad: 0.2089 (0.1942) time: 0.4725 data: 0.0034 max mem: 22446 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 0.7804 (0.8498) grad: 0.2004 (0.1964) time: 0.4731 data: 0.0034 max mem: 22446 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 0.6942 (0.8415) grad: 0.2348 (0.1983) time: 0.4575 data: 0.0034 max mem: 22446 +train: [2] Total time: 0:03:12 (0.4809 s / it) +train: [2] Summary: lr: 0.000180 loss: 0.6942 (0.8415) grad: 0.2348 (0.1983) +eval (validation): [2] [ 0/63] eta: 0:03:23 time: 3.2317 data: 2.9497 max mem: 22446 +eval (validation): [2] [20/63] eta: 0:00:21 time: 0.3694 data: 0.0035 max mem: 22446 +eval (validation): [2] [40/63] eta: 0:00:09 time: 0.3362 data: 0.0029 max mem: 22446 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3354 data: 0.0032 max mem: 22446 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3299 data: 0.0031 max mem: 22446 +eval (validation): [2] Total time: 0:00:24 (0.3965 s / it) +cv: [2] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.031 acc: 0.990 f1: 0.988 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:22:50 lr: nan time: 3.4275 data: 3.0222 max mem: 22446 +train: [3] [ 20/400] eta: 0:04:00 lr: 0.000183 loss: 0.6316 (0.6963) grad: 0.2497 (0.2378) time: 0.4919 data: 0.0038 max mem: 22446 +train: [3] [ 40/400] eta: 0:03:15 lr: 0.000186 loss: 0.6757 (0.7118) grad: 0.2095 (0.2219) time: 0.4485 data: 0.0030 max mem: 22446 +train: [3] [ 60/400] eta: 0:02:57 lr: 0.000189 loss: 0.7155 (0.7493) grad: 0.2021 (0.2308) time: 0.4821 data: 0.0035 max mem: 22446 +train: [3] [ 80/400] eta: 0:02:43 lr: 0.000192 loss: 0.7283 (0.7527) grad: 0.2187 (0.2237) time: 0.4712 data: 0.0034 max mem: 22446 +train: [3] [100/400] eta: 0:02:30 lr: 0.000195 loss: 0.6979 (0.7469) grad: 0.2176 (0.2233) time: 0.4635 data: 0.0034 max mem: 22446 +train: [3] [120/400] eta: 0:02:18 lr: 0.000198 loss: 0.6886 (0.7419) grad: 0.2058 (0.2206) time: 0.4559 data: 0.0034 max mem: 22446 +train: [3] [140/400] eta: 0:02:06 lr: 0.000201 loss: 0.6823 (0.7448) grad: 0.2058 (0.2232) time: 0.4555 data: 0.0036 max mem: 22446 +train: [3] [160/400] eta: 0:01:56 lr: 0.000204 loss: 0.6789 (0.7438) grad: 0.2497 (0.2255) time: 0.4660 data: 0.0034 max mem: 22446 +train: [3] [180/400] eta: 0:01:46 lr: 0.000207 loss: 0.6547 (0.7409) grad: 0.2489 (0.2255) time: 0.4774 data: 0.0035 max mem: 22446 +train: [3] [200/400] eta: 0:01:36 lr: 0.000210 loss: 0.6623 (0.7425) grad: 0.2416 (0.2285) time: 0.4592 data: 0.0036 max mem: 22446 +train: [3] [220/400] eta: 0:01:26 lr: 0.000213 loss: 0.6376 (0.7347) grad: 0.2579 (0.2309) time: 0.4538 data: 0.0033 max mem: 22446 +train: [3] [240/400] eta: 0:01:16 lr: 0.000216 loss: 0.7146 (0.7401) grad: 0.2560 (0.2337) time: 0.4704 data: 0.0033 max mem: 22446 +train: [3] [260/400] eta: 0:01:06 lr: 0.000219 loss: 0.7101 (0.7358) grad: 0.2847 (0.2470) time: 0.4597 data: 0.0039 max mem: 22446 +train: [3] [280/400] eta: 0:00:57 lr: 0.000222 loss: 0.5887 (0.7332) grad: 0.3261 (0.2509) time: 0.4650 data: 0.0027 max mem: 22446 +train: [3] [300/400] eta: 0:00:48 lr: 0.000225 loss: 0.6781 (0.7353) grad: 0.2667 (0.2528) time: 0.6232 data: 0.1797 max mem: 22446 +train: [3] [320/400] eta: 0:00:38 lr: 0.000228 loss: 0.6536 (0.7301) grad: 0.2504 (0.2544) time: 0.4696 data: 0.0035 max mem: 22446 +train: [3] [340/400] eta: 0:00:28 lr: 0.000231 loss: 0.6189 (0.7209) grad: 0.2736 (0.2569) time: 0.4522 data: 0.0033 max mem: 22446 +train: [3] [360/400] eta: 0:00:19 lr: 0.000234 loss: 0.5759 (0.7124) grad: 0.2549 (0.2593) time: 0.4682 data: 0.0035 max mem: 22446 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 0.5358 (0.7091) grad: 0.2440 (0.2592) time: 0.4751 data: 0.0034 max mem: 22446 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 0.5818 (0.7053) grad: 0.2424 (0.2600) time: 0.4645 data: 0.0035 max mem: 22446 +train: [3] Total time: 0:03:12 (0.4813 s / it) +train: [3] Summary: lr: 0.000240 loss: 0.5818 (0.7053) grad: 0.2424 (0.2600) +eval (validation): [3] [ 0/63] eta: 0:03:20 time: 3.1832 data: 2.9015 max mem: 22446 +eval (validation): [3] [20/63] eta: 0:00:21 time: 0.3728 data: 0.0050 max mem: 22446 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3400 data: 0.0032 max mem: 22446 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3296 data: 0.0032 max mem: 22446 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3297 data: 0.0031 max mem: 22446 +eval (validation): [3] Total time: 0:00:24 (0.3964 s / it) +cv: [3] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 0.037 acc: 0.990 f1: 0.988 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [4] [ 0/400] eta: 0:23:02 lr: nan time: 3.4552 data: 3.0872 max mem: 22446 +train: [4] [ 20/400] eta: 0:03:53 lr: 0.000243 loss: 0.7808 (0.8061) grad: 0.3217 (0.3055) time: 0.4737 data: 0.0030 max mem: 22446 +train: [4] [ 40/400] eta: 0:03:13 lr: 0.000246 loss: 0.6810 (0.7678) grad: 0.2772 (0.2854) time: 0.4529 data: 0.0031 max mem: 22446 +train: [4] [ 60/400] eta: 0:02:55 lr: 0.000249 loss: 0.6326 (0.7175) grad: 0.2632 (0.2923) time: 0.4783 data: 0.0034 max mem: 22446 +train: [4] [ 80/400] eta: 0:02:41 lr: 0.000252 loss: 0.5881 (0.6931) grad: 0.3298 (0.3043) time: 0.4623 data: 0.0035 max mem: 22446 +train: [4] [100/400] eta: 0:02:28 lr: 0.000255 loss: 0.6081 (0.6973) grad: 0.3330 (0.3114) time: 0.4527 data: 0.0035 max mem: 22446 +train: [4] [120/400] eta: 0:02:16 lr: 0.000258 loss: 0.6673 (0.7073) grad: 0.3387 (0.3181) time: 0.4614 data: 0.0037 max mem: 22446 +train: [4] [140/400] eta: 0:02:05 lr: 0.000261 loss: 0.5729 (0.7005) grad: 0.3322 (0.3176) time: 0.4562 data: 0.0035 max mem: 22446 +train: [4] [160/400] eta: 0:01:55 lr: 0.000264 loss: 0.8209 (0.7246) grad: 0.3322 (0.3346) time: 0.4608 data: 0.0034 max mem: 22446 +train: [4] [180/400] eta: 0:01:45 lr: 0.000267 loss: 0.8209 (0.7377) grad: 0.4328 (0.3521) time: 0.4692 data: 0.0034 max mem: 22446 +train: [4] [200/400] eta: 0:01:35 lr: 0.000270 loss: 0.8071 (0.7438) grad: 0.4881 (0.3675) time: 0.4601 data: 0.0035 max mem: 22446 +train: [4] [220/400] eta: 0:01:25 lr: 0.000273 loss: 0.7942 (0.7455) grad: 0.4202 (0.3733) time: 0.4617 data: 0.0032 max mem: 22446 +train: [4] [240/400] eta: 0:01:16 lr: 0.000276 loss: 0.6352 (0.7452) grad: 0.4071 (0.3756) time: 0.4740 data: 0.0034 max mem: 22446 +train: [4] [260/400] eta: 0:01:06 lr: 0.000279 loss: 0.7969 (0.7646) grad: 0.3995 (0.3765) time: 0.4593 data: 0.0033 max mem: 22446 +train: [4] [280/400] eta: 0:00:56 lr: 0.000282 loss: 0.9037 (0.7733) grad: 0.3867 (0.3797) time: 0.4564 data: 0.0035 max mem: 22446 +train: [4] [300/400] eta: 0:00:48 lr: 0.000285 loss: 0.7508 (0.7921) grad: 0.3956 (0.3861) time: 0.6222 data: 0.1763 max mem: 22446 +train: [4] [320/400] eta: 0:00:38 lr: 0.000288 loss: 0.6239 (0.7855) grad: 0.4413 (0.3966) time: 0.4664 data: 0.0032 max mem: 22446 +train: [4] [340/400] eta: 0:00:28 lr: 0.000291 loss: 0.6222 (0.7780) grad: 0.4413 (0.3967) time: 0.4473 data: 0.0033 max mem: 22446 +train: [4] [360/400] eta: 0:00:19 lr: 0.000294 loss: 0.7858 (0.7906) grad: 0.4469 (0.4005) time: 0.4625 data: 0.0033 max mem: 22446 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.8126 (0.7957) grad: 0.4335 (0.4030) time: 0.4647 data: 0.0035 max mem: 22446 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 0.7585 (0.7961) grad: 0.4237 (0.4058) time: 0.4591 data: 0.0037 max mem: 22446 +train: [4] Total time: 0:03:11 (0.4778 s / it) +train: [4] Summary: lr: 0.000300 loss: 0.7585 (0.7961) grad: 0.4237 (0.4058) +eval (validation): [4] [ 0/63] eta: 0:03:29 time: 3.3297 data: 3.0477 max mem: 22446 +eval (validation): [4] [20/63] eta: 0:00:20 time: 0.3438 data: 0.0033 max mem: 22446 +eval (validation): [4] [40/63] eta: 0:00:09 time: 0.3455 data: 0.0029 max mem: 22446 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3260 data: 0.0032 max mem: 22446 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3246 data: 0.0032 max mem: 22446 +eval (validation): [4] Total time: 0:00:24 (0.3906 s / it) +cv: [4] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 0.040 acc: 0.989 f1: 0.988 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:24:41 lr: nan time: 3.7041 data: 3.3003 max mem: 22446 +train: [5] [ 20/400] eta: 0:04:00 lr: 0.000300 loss: 0.6089 (0.6933) grad: 0.4936 (0.4722) time: 0.4791 data: 0.0022 max mem: 22446 +train: [5] [ 40/400] eta: 0:03:15 lr: 0.000300 loss: 0.6341 (0.7543) grad: 0.5003 (0.4906) time: 0.4476 data: 0.0033 max mem: 22446 +train: [5] [ 60/400] eta: 0:02:56 lr: 0.000300 loss: 0.6341 (0.7815) grad: 0.4230 (0.4652) time: 0.4733 data: 0.0034 max mem: 22446 +train: [5] [ 80/400] eta: 0:02:42 lr: 0.000300 loss: 0.6069 (0.7700) grad: 0.3873 (0.4496) time: 0.4674 data: 0.0034 max mem: 22446 +train: [5] [100/400] eta: 0:02:29 lr: 0.000300 loss: 1.0125 (0.8811) grad: 0.4237 (0.4533) time: 0.4588 data: 0.0034 max mem: 22446 +train: [5] [120/400] eta: 0:02:17 lr: 0.000300 loss: 1.1011 (0.8754) grad: 0.4372 (0.4532) time: 0.4576 data: 0.0036 max mem: 22446 +train: [5] [140/400] eta: 0:02:06 lr: 0.000300 loss: 0.7646 (0.8689) grad: 0.4372 (0.4502) time: 0.4590 data: 0.0035 max mem: 22446 +train: [5] [160/400] eta: 0:01:55 lr: 0.000299 loss: 0.7056 (0.8892) grad: 0.4373 (0.4484) time: 0.4551 data: 0.0033 max mem: 22446 +train: [5] [180/400] eta: 0:01:45 lr: 0.000299 loss: 0.6857 (0.8670) grad: 0.4610 (0.4540) time: 0.4678 data: 0.0036 max mem: 22446 +train: [5] [200/400] eta: 0:01:35 lr: 0.000299 loss: 0.6094 (0.8710) grad: 0.4697 (0.4555) time: 0.4633 data: 0.0035 max mem: 22446 +train: [5] [220/400] eta: 0:01:25 lr: 0.000299 loss: 0.7168 (0.8815) grad: 0.4625 (0.4549) time: 0.4585 data: 0.0033 max mem: 22446 +train: [5] [240/400] eta: 0:01:16 lr: 0.000299 loss: 0.7518 (0.8684) grad: 0.4510 (0.4585) time: 0.4714 data: 0.0035 max mem: 22446 +train: [5] [260/400] eta: 0:01:06 lr: 0.000299 loss: 0.7796 (0.8788) grad: 0.5244 (0.4626) time: 0.4670 data: 0.0035 max mem: 22446 +train: [5] [280/400] eta: 0:00:56 lr: 0.000298 loss: 0.9394 (0.8917) grad: 0.5063 (0.4640) time: 0.4598 data: 0.0035 max mem: 22446 +train: [5] [300/400] eta: 0:00:48 lr: 0.000298 loss: 0.7866 (0.8780) grad: 0.4776 (0.4627) time: 0.6363 data: 0.1823 max mem: 22446 +train: [5] [320/400] eta: 0:00:38 lr: 0.000298 loss: 0.6111 (0.8665) grad: 0.4457 (0.4623) time: 0.4728 data: 0.0035 max mem: 22446 +train: [5] [340/400] eta: 0:00:29 lr: 0.000298 loss: 0.5623 (0.8622) grad: 0.4019 (0.4573) time: 0.4632 data: 0.0034 max mem: 22446 +train: [5] [360/400] eta: 0:00:19 lr: 0.000297 loss: 0.5527 (0.8573) grad: 0.4252 (0.4649) time: 0.4627 data: 0.0032 max mem: 22446 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.7087 (0.8505) grad: 0.4736 (0.4684) time: 0.4837 data: 0.0035 max mem: 22446 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 0.5345 (0.8370) grad: 0.4488 (0.4671) time: 0.4651 data: 0.0034 max mem: 22446 +train: [5] Total time: 0:03:12 (0.4819 s / it) +train: [5] Summary: lr: 0.000297 loss: 0.5345 (0.8370) grad: 0.4488 (0.4671) +eval (validation): [5] [ 0/63] eta: 0:03:28 time: 3.3038 data: 3.0106 max mem: 22446 +eval (validation): [5] [20/63] eta: 0:00:22 time: 0.3798 data: 0.0036 max mem: 22446 +eval (validation): [5] [40/63] eta: 0:00:10 time: 0.3583 data: 0.0033 max mem: 22446 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3437 data: 0.0031 max mem: 22446 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3458 data: 0.0031 max mem: 22446 +eval (validation): [5] Total time: 0:00:25 (0.4121 s / it) +cv: [5] best hparam: (1.6, 1.0) (027) ('027_lr1.6e+00_wd1.0e+00') loss: 0.034 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:23:18 lr: nan time: 3.4959 data: 3.1404 max mem: 22446 +train: [6] [ 20/400] eta: 0:03:55 lr: 0.000296 loss: 0.5062 (0.5516) grad: 0.3707 (0.3865) time: 0.4771 data: 0.0033 max mem: 22446 +train: [6] [ 40/400] eta: 0:03:14 lr: 0.000296 loss: 0.4826 (0.5534) grad: 0.3737 (0.4646) time: 0.4574 data: 0.0023 max mem: 22446 +train: [6] [ 60/400] eta: 0:02:56 lr: 0.000296 loss: 0.4933 (0.5950) grad: 0.4463 (0.4548) time: 0.4775 data: 0.0035 max mem: 22446 +train: [6] [ 80/400] eta: 0:02:42 lr: 0.000295 loss: 0.7329 (0.6411) grad: 0.4499 (0.4469) time: 0.4747 data: 0.0034 max mem: 22446 +train: [6] [100/400] eta: 0:02:30 lr: 0.000295 loss: 0.6524 (0.6156) grad: 0.4353 (0.4481) time: 0.4641 data: 0.0034 max mem: 22446 +train: [6] [120/400] eta: 0:02:18 lr: 0.000295 loss: 0.4311 (0.5957) grad: 0.3787 (0.4404) time: 0.4734 data: 0.0033 max mem: 22446 +train: [6] [140/400] eta: 0:02:07 lr: 0.000294 loss: 0.4311 (0.5890) grad: 0.3628 (0.4335) time: 0.4612 data: 0.0034 max mem: 22446 +train: [6] [160/400] eta: 0:01:57 lr: 0.000294 loss: 0.4621 (0.5894) grad: 0.3895 (0.4296) time: 0.4692 data: 0.0035 max mem: 22446 +train: [6] [180/400] eta: 0:01:46 lr: 0.000293 loss: 0.5077 (0.5966) grad: 0.3655 (0.4222) time: 0.4691 data: 0.0035 max mem: 22446 +train: [6] [200/400] eta: 0:01:36 lr: 0.000293 loss: 0.4277 (0.6107) grad: 0.3666 (0.4162) time: 0.4656 data: 0.0035 max mem: 22446 +train: [6] [220/400] eta: 0:01:26 lr: 0.000292 loss: 0.4386 (0.6047) grad: 0.3850 (0.4098) time: 0.4640 data: 0.0036 max mem: 22446 +train: [6] [240/400] eta: 0:01:16 lr: 0.000292 loss: 0.4149 (0.5878) grad: 0.3112 (0.4038) time: 0.4707 data: 0.0035 max mem: 22446 +train: [6] [260/400] eta: 0:01:07 lr: 0.000291 loss: 0.4042 (0.5848) grad: 0.3732 (0.4033) time: 0.4630 data: 0.0034 max mem: 22446 +train: [6] [280/400] eta: 0:00:57 lr: 0.000291 loss: 0.5708 (0.5874) grad: 0.3666 (0.4016) time: 0.4644 data: 0.0034 max mem: 22446 +train: [6] [300/400] eta: 0:00:48 lr: 0.000290 loss: 0.3680 (0.5756) grad: 0.3181 (0.3972) time: 0.6462 data: 0.1776 max mem: 22446 +train: [6] [320/400] eta: 0:00:39 lr: 0.000290 loss: 0.4084 (0.5708) grad: 0.3414 (0.3943) time: 0.4739 data: 0.0033 max mem: 22446 +train: [6] [340/400] eta: 0:00:29 lr: 0.000289 loss: 0.4540 (0.5664) grad: 0.3424 (0.3918) time: 0.4566 data: 0.0032 max mem: 22446 +train: [6] [360/400] eta: 0:00:19 lr: 0.000288 loss: 0.3794 (0.5575) grad: 0.3270 (0.3872) time: 0.4789 data: 0.0034 max mem: 22446 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 0.3359 (0.5472) grad: 0.2890 (0.3803) time: 0.4818 data: 0.0035 max mem: 22446 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.2951 (0.5332) grad: 0.2620 (0.3793) time: 0.4603 data: 0.0034 max mem: 22446 +train: [6] Total time: 0:03:14 (0.4853 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.2951 (0.5332) grad: 0.2620 (0.3793) +eval (validation): [6] [ 0/63] eta: 0:03:29 time: 3.3216 data: 3.0696 max mem: 22446 +eval (validation): [6] [20/63] eta: 0:00:22 time: 0.3724 data: 0.0034 max mem: 22446 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3390 data: 0.0028 max mem: 22446 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3441 data: 0.0032 max mem: 22446 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3427 data: 0.0032 max mem: 22446 +eval (validation): [6] Total time: 0:00:25 (0.4038 s / it) +cv: [6] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 0.038 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:22:46 lr: nan time: 3.4166 data: 3.0150 max mem: 22446 +train: [7] [ 20/400] eta: 0:03:50 lr: 0.000286 loss: 0.2954 (0.4110) grad: 0.2982 (0.3144) time: 0.4661 data: 0.0029 max mem: 22446 +train: [7] [ 40/400] eta: 0:03:08 lr: 0.000286 loss: 0.3125 (0.3709) grad: 0.2650 (0.2946) time: 0.4378 data: 0.0030 max mem: 22446 +train: [7] [ 60/400] eta: 0:02:53 lr: 0.000285 loss: 0.3125 (0.3772) grad: 0.2618 (0.2884) time: 0.4838 data: 0.0036 max mem: 22446 +train: [7] [ 80/400] eta: 0:02:38 lr: 0.000284 loss: 0.2842 (0.3584) grad: 0.2224 (0.2746) time: 0.4471 data: 0.0035 max mem: 22446 +train: [7] [100/400] eta: 0:02:25 lr: 0.000284 loss: 0.2678 (0.3502) grad: 0.2743 (0.2768) time: 0.4475 data: 0.0033 max mem: 22446 +train: [7] [120/400] eta: 0:02:14 lr: 0.000283 loss: 0.2426 (0.3355) grad: 0.2681 (0.2672) time: 0.4593 data: 0.0035 max mem: 22446 +train: [7] [140/400] eta: 0:02:04 lr: 0.000282 loss: 0.2426 (0.3345) grad: 0.2320 (0.2668) time: 0.4662 data: 0.0034 max mem: 22446 +train: [7] [160/400] eta: 0:01:54 lr: 0.000282 loss: 0.3038 (0.3383) grad: 0.2782 (0.2703) time: 0.4490 data: 0.0034 max mem: 22446 +train: [7] [180/400] eta: 0:01:44 lr: 0.000281 loss: 0.2405 (0.3426) grad: 0.3051 (0.2770) time: 0.4550 data: 0.0033 max mem: 22446 +train: [7] [200/400] eta: 0:01:34 lr: 0.000280 loss: 0.2456 (0.3453) grad: 0.3273 (0.2853) time: 0.4562 data: 0.0035 max mem: 22446 +train: [7] [220/400] eta: 0:01:24 lr: 0.000279 loss: 0.2456 (0.3404) grad: 0.3240 (0.2860) time: 0.4585 data: 0.0036 max mem: 22446 +train: [7] [240/400] eta: 0:01:15 lr: 0.000278 loss: 0.3105 (0.3531) grad: 0.3119 (0.2875) time: 0.4646 data: 0.0034 max mem: 22446 +train: [7] [260/400] eta: 0:01:05 lr: 0.000278 loss: 0.3600 (0.3524) grad: 0.2665 (0.2855) time: 0.4692 data: 0.0034 max mem: 22446 +train: [7] [280/400] eta: 0:00:56 lr: 0.000277 loss: 0.3066 (0.3642) grad: 0.2748 (0.2866) time: 0.4659 data: 0.0035 max mem: 22446 +train: [7] [300/400] eta: 0:00:47 lr: 0.000276 loss: 0.3549 (0.3688) grad: 0.3240 (0.2907) time: 0.6186 data: 0.1756 max mem: 22446 +train: [7] [320/400] eta: 0:00:38 lr: 0.000275 loss: 0.3411 (0.3713) grad: 0.3078 (0.2882) time: 0.4579 data: 0.0032 max mem: 22446 +train: [7] [340/400] eta: 0:00:28 lr: 0.000274 loss: 0.2346 (0.3637) grad: 0.2234 (0.2845) time: 0.4563 data: 0.0034 max mem: 22446 +train: [7] [360/400] eta: 0:00:19 lr: 0.000273 loss: 0.2045 (0.3574) grad: 0.2374 (0.2834) time: 0.4709 data: 0.0035 max mem: 22446 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 0.1774 (0.3479) grad: 0.2367 (0.2803) time: 0.4563 data: 0.0035 max mem: 22446 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.1732 (0.3444) grad: 0.2367 (0.2784) time: 0.4513 data: 0.0034 max mem: 22446 +train: [7] Total time: 0:03:09 (0.4746 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.1732 (0.3444) grad: 0.2367 (0.2784) +eval (validation): [7] [ 0/63] eta: 0:03:18 time: 3.1513 data: 2.8546 max mem: 22446 +eval (validation): [7] [20/63] eta: 0:00:20 time: 0.3486 data: 0.0099 max mem: 22446 +eval (validation): [7] [40/63] eta: 0:00:09 time: 0.3514 data: 0.0031 max mem: 22446 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3387 data: 0.0030 max mem: 22446 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3394 data: 0.0033 max mem: 22446 +eval (validation): [7] Total time: 0:00:24 (0.3955 s / it) +cv: [7] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.043 acc: 0.991 f1: 0.990 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:23:01 lr: nan time: 3.4542 data: 3.0962 max mem: 22446 +train: [8] [ 20/400] eta: 0:03:49 lr: 0.000270 loss: 0.1565 (0.1856) grad: 0.1579 (0.1779) time: 0.4609 data: 0.0031 max mem: 22446 +train: [8] [ 40/400] eta: 0:03:10 lr: 0.000270 loss: 0.1720 (0.2251) grad: 0.1813 (0.1913) time: 0.4511 data: 0.0028 max mem: 22446 +train: [8] [ 60/400] eta: 0:02:52 lr: 0.000269 loss: 0.1710 (0.2151) grad: 0.1997 (0.1912) time: 0.4646 data: 0.0035 max mem: 22446 +train: [8] [ 80/400] eta: 0:02:40 lr: 0.000268 loss: 0.1710 (0.2173) grad: 0.1964 (0.1954) time: 0.4791 data: 0.0034 max mem: 22446 +train: [8] [100/400] eta: 0:02:27 lr: 0.000267 loss: 0.2041 (0.2228) grad: 0.1964 (0.2024) time: 0.4526 data: 0.0034 max mem: 22446 +train: [8] [120/400] eta: 0:02:15 lr: 0.000266 loss: 0.2425 (0.2468) grad: 0.2205 (0.2153) time: 0.4564 data: 0.0032 max mem: 22446 +train: [8] [140/400] eta: 0:02:05 lr: 0.000265 loss: 0.2527 (0.2492) grad: 0.2281 (0.2186) time: 0.4722 data: 0.0035 max mem: 22446 +train: [8] [160/400] eta: 0:01:55 lr: 0.000264 loss: 0.1852 (0.2437) grad: 0.1870 (0.2175) time: 0.4599 data: 0.0034 max mem: 22446 +train: [8] [180/400] eta: 0:01:45 lr: 0.000263 loss: 0.1772 (0.2452) grad: 0.1839 (0.2160) time: 0.4510 data: 0.0032 max mem: 22446 +train: [8] [200/400] eta: 0:01:34 lr: 0.000262 loss: 0.1691 (0.2399) grad: 0.1913 (0.2151) time: 0.4514 data: 0.0032 max mem: 22446 +train: [8] [220/400] eta: 0:01:25 lr: 0.000260 loss: 0.1937 (0.2388) grad: 0.1774 (0.2139) time: 0.4564 data: 0.0029 max mem: 22446 +train: [8] [240/400] eta: 0:01:15 lr: 0.000259 loss: 0.2168 (0.2404) grad: 0.2109 (0.2166) time: 0.4561 data: 0.0033 max mem: 22446 +train: [8] [260/400] eta: 0:01:05 lr: 0.000258 loss: 0.1999 (0.2405) grad: 0.2162 (0.2181) time: 0.4543 data: 0.0032 max mem: 22446 +train: [8] [280/400] eta: 0:00:56 lr: 0.000257 loss: 0.1766 (0.2417) grad: 0.2332 (0.2189) time: 0.4568 data: 0.0032 max mem: 22446 +train: [8] [300/400] eta: 0:00:47 lr: 0.000256 loss: 0.2268 (0.2520) grad: 0.2472 (0.2235) time: 0.6096 data: 0.1672 max mem: 22446 +train: [8] [320/400] eta: 0:00:38 lr: 0.000255 loss: 0.2224 (0.2507) grad: 0.2051 (0.2217) time: 0.4566 data: 0.0036 max mem: 22446 +train: [8] [340/400] eta: 0:00:28 lr: 0.000254 loss: 0.2076 (0.2494) grad: 0.1792 (0.2200) time: 0.4494 data: 0.0028 max mem: 22446 +train: [8] [360/400] eta: 0:00:19 lr: 0.000253 loss: 0.1470 (0.2427) grad: 0.1598 (0.2161) time: 0.4673 data: 0.0032 max mem: 22446 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 0.1309 (0.2386) grad: 0.1625 (0.2133) time: 0.4525 data: 0.0034 max mem: 22446 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.1516 (0.2358) grad: 0.1712 (0.2107) time: 0.4581 data: 0.0035 max mem: 22446 +train: [8] Total time: 0:03:09 (0.4736 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.1516 (0.2358) grad: 0.1712 (0.2107) +eval (validation): [8] [ 0/63] eta: 0:03:16 time: 3.1166 data: 2.8437 max mem: 22446 +eval (validation): [8] [20/63] eta: 0:00:21 time: 0.3729 data: 0.0041 max mem: 22446 +eval (validation): [8] [40/63] eta: 0:00:09 time: 0.3255 data: 0.0033 max mem: 22446 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3176 data: 0.0031 max mem: 22446 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3167 data: 0.0031 max mem: 22446 +eval (validation): [8] Total time: 0:00:24 (0.3871 s / it) +cv: [8] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.038 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:22:43 lr: nan time: 3.4094 data: 3.0092 max mem: 22446 +train: [9] [ 20/400] eta: 0:04:00 lr: 0.000249 loss: 0.1883 (0.2379) grad: 0.1543 (0.1917) time: 0.4950 data: 0.0223 max mem: 22446 +train: [9] [ 40/400] eta: 0:03:14 lr: 0.000248 loss: 0.1724 (0.2111) grad: 0.1543 (0.1786) time: 0.4445 data: 0.0030 max mem: 22446 +train: [9] [ 60/400] eta: 0:02:54 lr: 0.000247 loss: 0.1555 (0.1908) grad: 0.1625 (0.1706) time: 0.4532 data: 0.0033 max mem: 22446 +train: [9] [ 80/400] eta: 0:02:40 lr: 0.000246 loss: 0.1298 (0.1845) grad: 0.1523 (0.1648) time: 0.4643 data: 0.0036 max mem: 22446 +train: [9] [100/400] eta: 0:02:27 lr: 0.000244 loss: 0.1224 (0.1764) grad: 0.1331 (0.1630) time: 0.4580 data: 0.0034 max mem: 22446 +train: [9] [120/400] eta: 0:02:15 lr: 0.000243 loss: 0.1382 (0.1805) grad: 0.1509 (0.1646) time: 0.4358 data: 0.0032 max mem: 22446 +train: [9] [140/400] eta: 0:02:05 lr: 0.000242 loss: 0.1559 (0.1770) grad: 0.1511 (0.1627) time: 0.4774 data: 0.0034 max mem: 22446 +train: [9] [160/400] eta: 0:01:54 lr: 0.000241 loss: 0.1328 (0.1744) grad: 0.1511 (0.1635) time: 0.4474 data: 0.0034 max mem: 22446 +train: [9] [180/400] eta: 0:01:44 lr: 0.000240 loss: 0.1280 (0.1715) grad: 0.1555 (0.1634) time: 0.4476 data: 0.0034 max mem: 22446 +train: [9] [200/400] eta: 0:01:34 lr: 0.000238 loss: 0.1335 (0.1737) grad: 0.1815 (0.1662) time: 0.4560 data: 0.0034 max mem: 22446 +train: [9] [220/400] eta: 0:01:24 lr: 0.000237 loss: 0.1399 (0.1730) grad: 0.1755 (0.1643) time: 0.4554 data: 0.0034 max mem: 22446 +train: [9] [240/400] eta: 0:01:15 lr: 0.000236 loss: 0.1271 (0.1692) grad: 0.1347 (0.1621) time: 0.4551 data: 0.0033 max mem: 22446 +train: [9] [260/400] eta: 0:01:05 lr: 0.000234 loss: 0.1227 (0.1689) grad: 0.1425 (0.1620) time: 0.4694 data: 0.0033 max mem: 22446 +train: [9] [280/400] eta: 0:00:56 lr: 0.000233 loss: 0.1567 (0.1688) grad: 0.1410 (0.1608) time: 0.4542 data: 0.0034 max mem: 22446 +train: [9] [300/400] eta: 0:00:47 lr: 0.000232 loss: 0.1180 (0.1676) grad: 0.1299 (0.1610) time: 0.6018 data: 0.1691 max mem: 22446 +train: [9] [320/400] eta: 0:00:38 lr: 0.000230 loss: 0.1034 (0.1637) grad: 0.1350 (0.1603) time: 0.4590 data: 0.0032 max mem: 22446 +train: [9] [340/400] eta: 0:00:28 lr: 0.000229 loss: 0.1110 (0.1642) grad: 0.1669 (0.1611) time: 0.4424 data: 0.0033 max mem: 22446 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 0.1349 (0.1630) grad: 0.1616 (0.1603) time: 0.4598 data: 0.0034 max mem: 22446 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 0.1163 (0.1610) grad: 0.1307 (0.1591) time: 0.4615 data: 0.0034 max mem: 22446 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.1164 (0.1592) grad: 0.0975 (0.1565) time: 0.4452 data: 0.0034 max mem: 22446 +train: [9] Total time: 0:03:08 (0.4719 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.1164 (0.1592) grad: 0.0975 (0.1565) +eval (validation): [9] [ 0/63] eta: 0:03:16 time: 3.1256 data: 2.9027 max mem: 22446 +eval (validation): [9] [20/63] eta: 0:00:20 time: 0.3378 data: 0.0032 max mem: 22446 +eval (validation): [9] [40/63] eta: 0:00:09 time: 0.3548 data: 0.0028 max mem: 22446 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3273 data: 0.0031 max mem: 22446 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3265 data: 0.0031 max mem: 22446 +eval (validation): [9] Total time: 0:00:24 (0.3887 s / it) +cv: [9] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.041 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:22:27 lr: nan time: 3.3695 data: 3.0225 max mem: 22446 +train: [10] [ 20/400] eta: 0:03:51 lr: 0.000224 loss: 0.1418 (0.1499) grad: 0.1168 (0.1329) time: 0.4721 data: 0.0030 max mem: 22446 +train: [10] [ 40/400] eta: 0:03:12 lr: 0.000222 loss: 0.1259 (0.1301) grad: 0.1176 (0.1356) time: 0.4561 data: 0.0034 max mem: 22446 +train: [10] [ 60/400] eta: 0:02:52 lr: 0.000221 loss: 0.1120 (0.1269) grad: 0.1181 (0.1294) time: 0.4549 data: 0.0034 max mem: 22446 +train: [10] [ 80/400] eta: 0:02:39 lr: 0.000220 loss: 0.1158 (0.1303) grad: 0.1211 (0.1318) time: 0.4623 data: 0.0034 max mem: 22446 +train: [10] [100/400] eta: 0:02:26 lr: 0.000218 loss: 0.1042 (0.1284) grad: 0.1178 (0.1290) time: 0.4568 data: 0.0034 max mem: 22446 +train: [10] [120/400] eta: 0:02:14 lr: 0.000217 loss: 0.1042 (0.1270) grad: 0.1117 (0.1288) time: 0.4410 data: 0.0032 max mem: 22446 +train: [10] [140/400] eta: 0:02:04 lr: 0.000215 loss: 0.1184 (0.1298) grad: 0.1157 (0.1293) time: 0.4594 data: 0.0033 max mem: 22446 +train: [10] [160/400] eta: 0:01:54 lr: 0.000214 loss: 0.1120 (0.1274) grad: 0.1123 (0.1272) time: 0.4662 data: 0.0037 max mem: 22446 +train: [10] [180/400] eta: 0:01:44 lr: 0.000213 loss: 0.1040 (0.1263) grad: 0.1110 (0.1262) time: 0.4617 data: 0.0035 max mem: 22446 +train: [10] [200/400] eta: 0:01:34 lr: 0.000211 loss: 0.1161 (0.1263) grad: 0.1210 (0.1268) time: 0.4515 data: 0.0036 max mem: 22446 +train: [10] [220/400] eta: 0:01:24 lr: 0.000210 loss: 0.1163 (0.1260) grad: 0.1213 (0.1271) time: 0.4493 data: 0.0034 max mem: 22446 +train: [10] [240/400] eta: 0:01:15 lr: 0.000208 loss: 0.1066 (0.1242) grad: 0.1218 (0.1272) time: 0.4504 data: 0.0033 max mem: 22446 +train: [10] [260/400] eta: 0:01:05 lr: 0.000207 loss: 0.1007 (0.1240) grad: 0.1402 (0.1288) time: 0.4601 data: 0.0035 max mem: 22446 +train: [10] [280/400] eta: 0:00:56 lr: 0.000205 loss: 0.1023 (0.1222) grad: 0.1288 (0.1277) time: 0.4610 data: 0.0035 max mem: 22446 +train: [10] [300/400] eta: 0:00:47 lr: 0.000204 loss: 0.1001 (0.1219) grad: 0.1184 (0.1263) time: 0.6200 data: 0.1706 max mem: 22446 +train: [10] [320/400] eta: 0:00:38 lr: 0.000202 loss: 0.0987 (0.1213) grad: 0.1151 (0.1253) time: 0.4609 data: 0.0039 max mem: 22446 +train: [10] [340/400] eta: 0:00:28 lr: 0.000201 loss: 0.0991 (0.1199) grad: 0.1025 (0.1238) time: 0.4544 data: 0.0034 max mem: 22446 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 0.0914 (0.1194) grad: 0.1047 (0.1228) time: 0.4638 data: 0.0036 max mem: 22446 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 0.0900 (0.1191) grad: 0.1033 (0.1211) time: 0.4572 data: 0.0036 max mem: 22446 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.0928 (0.1185) grad: 0.0808 (0.1188) time: 0.4519 data: 0.0036 max mem: 22446 +train: [10] Total time: 0:03:09 (0.4732 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.0928 (0.1185) grad: 0.0808 (0.1188) +eval (validation): [10] [ 0/63] eta: 0:03:20 time: 3.1747 data: 2.9472 max mem: 22446 +eval (validation): [10] [20/63] eta: 0:00:20 time: 0.3391 data: 0.0080 max mem: 22446 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3667 data: 0.0030 max mem: 22446 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3199 data: 0.0033 max mem: 22446 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3187 data: 0.0033 max mem: 22446 +eval (validation): [10] Total time: 0:00:24 (0.3909 s / it) +cv: [10] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.036 acc: 0.991 f1: 0.990 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:22:58 lr: nan time: 3.4473 data: 3.0618 max mem: 22446 +train: [11] [ 20/400] eta: 0:03:52 lr: 0.000195 loss: 0.0895 (0.1182) grad: 0.0963 (0.1027) time: 0.4700 data: 0.0024 max mem: 22446 +train: [11] [ 40/400] eta: 0:03:12 lr: 0.000193 loss: 0.1008 (0.1122) grad: 0.0963 (0.0984) time: 0.4530 data: 0.0033 max mem: 22446 +train: [11] [ 60/400] eta: 0:02:52 lr: 0.000192 loss: 0.0967 (0.1079) grad: 0.0824 (0.0880) time: 0.4528 data: 0.0033 max mem: 22446 +train: [11] [ 80/400] eta: 0:02:39 lr: 0.000190 loss: 0.0843 (0.1032) grad: 0.0706 (0.0871) time: 0.4647 data: 0.0033 max mem: 22446 +train: [11] [100/400] eta: 0:02:27 lr: 0.000189 loss: 0.0914 (0.1014) grad: 0.0855 (0.0869) time: 0.4685 data: 0.0034 max mem: 22446 +train: [11] [120/400] eta: 0:02:16 lr: 0.000187 loss: 0.0912 (0.1007) grad: 0.0931 (0.0908) time: 0.4598 data: 0.0033 max mem: 22446 +train: [11] [140/400] eta: 0:02:05 lr: 0.000186 loss: 0.0965 (0.1011) grad: 0.0929 (0.0917) time: 0.4698 data: 0.0034 max mem: 22446 +train: [11] [160/400] eta: 0:01:55 lr: 0.000184 loss: 0.1040 (0.1028) grad: 0.0963 (0.0934) time: 0.4666 data: 0.0035 max mem: 22446 +train: [11] [180/400] eta: 0:01:45 lr: 0.000183 loss: 0.0947 (0.1039) grad: 0.1143 (0.0949) time: 0.4582 data: 0.0033 max mem: 22446 +train: [11] [200/400] eta: 0:01:35 lr: 0.000181 loss: 0.0803 (0.1024) grad: 0.1143 (0.0963) time: 0.4502 data: 0.0034 max mem: 22446 +train: [11] [220/400] eta: 0:01:25 lr: 0.000180 loss: 0.0803 (0.1013) grad: 0.1035 (0.0958) time: 0.4449 data: 0.0033 max mem: 22446 +train: [11] [240/400] eta: 0:01:15 lr: 0.000178 loss: 0.0957 (0.1011) grad: 0.0871 (0.0954) time: 0.4499 data: 0.0035 max mem: 22446 +train: [11] [260/400] eta: 0:01:05 lr: 0.000177 loss: 0.0913 (0.1007) grad: 0.0762 (0.0940) time: 0.4692 data: 0.0035 max mem: 22446 +train: [11] [280/400] eta: 0:00:56 lr: 0.000175 loss: 0.0960 (0.1014) grad: 0.0737 (0.0940) time: 0.4550 data: 0.0035 max mem: 22446 +train: [11] [300/400] eta: 0:00:47 lr: 0.000174 loss: 0.0942 (0.1006) grad: 0.0755 (0.0933) time: 0.6039 data: 0.1671 max mem: 22446 +train: [11] [320/400] eta: 0:00:38 lr: 0.000172 loss: 0.0884 (0.1006) grad: 0.0839 (0.0934) time: 0.4532 data: 0.0027 max mem: 22446 +train: [11] [340/400] eta: 0:00:28 lr: 0.000170 loss: 0.0875 (0.0995) grad: 0.0802 (0.0919) time: 0.4500 data: 0.0034 max mem: 22446 +train: [11] [360/400] eta: 0:00:19 lr: 0.000169 loss: 0.0828 (0.0989) grad: 0.0691 (0.0907) time: 0.4627 data: 0.0032 max mem: 22446 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 0.0828 (0.0978) grad: 0.0691 (0.0897) time: 0.4586 data: 0.0033 max mem: 22446 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.0819 (0.0977) grad: 0.0741 (0.0894) time: 0.4480 data: 0.0034 max mem: 22446 +train: [11] Total time: 0:03:09 (0.4732 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.0819 (0.0977) grad: 0.0741 (0.0894) +eval (validation): [11] [ 0/63] eta: 0:03:22 time: 3.2215 data: 2.9502 max mem: 22446 +eval (validation): [11] [20/63] eta: 0:00:20 time: 0.3427 data: 0.0038 max mem: 22446 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3490 data: 0.0028 max mem: 22446 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3280 data: 0.0032 max mem: 22446 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3280 data: 0.0031 max mem: 22446 +eval (validation): [11] Total time: 0:00:24 (0.3901 s / it) +cv: [11] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.037 acc: 0.992 f1: 0.990 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:23:23 lr: nan time: 3.5076 data: 3.1185 max mem: 22446 +train: [12] [ 20/400] eta: 0:04:04 lr: 0.000164 loss: 0.0802 (0.0836) grad: 0.0620 (0.0636) time: 0.5011 data: 0.0031 max mem: 22446 +train: [12] [ 40/400] eta: 0:03:16 lr: 0.000163 loss: 0.0778 (0.0815) grad: 0.0639 (0.0647) time: 0.4401 data: 0.0030 max mem: 22446 +train: [12] [ 60/400] eta: 0:02:54 lr: 0.000161 loss: 0.0732 (0.0781) grad: 0.0564 (0.0594) time: 0.4503 data: 0.0033 max mem: 22446 +train: [12] [ 80/400] eta: 0:02:39 lr: 0.000160 loss: 0.0728 (0.0787) grad: 0.0449 (0.0585) time: 0.4530 data: 0.0034 max mem: 22446 +train: [12] [100/400] eta: 0:02:27 lr: 0.000158 loss: 0.0838 (0.0798) grad: 0.0448 (0.0576) time: 0.4628 data: 0.0035 max mem: 22446 +train: [12] [120/400] eta: 0:02:16 lr: 0.000156 loss: 0.0855 (0.0834) grad: 0.0654 (0.0632) time: 0.4656 data: 0.0034 max mem: 22446 +train: [12] [140/400] eta: 0:02:05 lr: 0.000155 loss: 0.0829 (0.0831) grad: 0.0728 (0.0629) time: 0.4450 data: 0.0033 max mem: 22446 +train: [12] [160/400] eta: 0:01:55 lr: 0.000153 loss: 0.0832 (0.0833) grad: 0.0540 (0.0631) time: 0.4817 data: 0.0035 max mem: 22446 +train: [12] [180/400] eta: 0:01:45 lr: 0.000152 loss: 0.0792 (0.0827) grad: 0.0540 (0.0625) time: 0.4575 data: 0.0034 max mem: 22446 +train: [12] [200/400] eta: 0:01:35 lr: 0.000150 loss: 0.0697 (0.0819) grad: 0.0545 (0.0623) time: 0.4454 data: 0.0033 max mem: 22446 +train: [12] [220/400] eta: 0:01:25 lr: 0.000149 loss: 0.0709 (0.0830) grad: 0.0660 (0.0632) time: 0.4564 data: 0.0035 max mem: 22446 +train: [12] [240/400] eta: 0:01:15 lr: 0.000147 loss: 0.0836 (0.0834) grad: 0.0675 (0.0642) time: 0.4587 data: 0.0034 max mem: 22446 +train: [12] [260/400] eta: 0:01:05 lr: 0.000145 loss: 0.0821 (0.0833) grad: 0.0686 (0.0644) time: 0.4536 data: 0.0033 max mem: 22446 +train: [12] [280/400] eta: 0:00:56 lr: 0.000144 loss: 0.0767 (0.0827) grad: 0.0611 (0.0635) time: 0.4862 data: 0.0038 max mem: 22446 +train: [12] [300/400] eta: 0:00:48 lr: 0.000142 loss: 0.0781 (0.0828) grad: 0.0471 (0.0631) time: 0.6202 data: 0.1789 max mem: 22446 +train: [12] [320/400] eta: 0:00:38 lr: 0.000141 loss: 0.0782 (0.0823) grad: 0.0471 (0.0629) time: 0.4633 data: 0.0028 max mem: 22446 +train: [12] [340/400] eta: 0:00:28 lr: 0.000139 loss: 0.0782 (0.0822) grad: 0.0556 (0.0625) time: 0.4632 data: 0.0035 max mem: 22446 +train: [12] [360/400] eta: 0:00:19 lr: 0.000138 loss: 0.0797 (0.0818) grad: 0.0469 (0.0620) time: 0.4603 data: 0.0034 max mem: 22446 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 0.0720 (0.0819) grad: 0.0483 (0.0620) time: 0.4636 data: 0.0033 max mem: 22446 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.0714 (0.0816) grad: 0.0483 (0.0612) time: 0.4531 data: 0.0034 max mem: 22446 +train: [12] Total time: 0:03:10 (0.4770 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.0714 (0.0816) grad: 0.0483 (0.0612) +eval (validation): [12] [ 0/63] eta: 0:03:24 time: 3.2504 data: 2.9593 max mem: 22446 +eval (validation): [12] [20/63] eta: 0:00:21 time: 0.3684 data: 0.0040 max mem: 22446 +eval (validation): [12] [40/63] eta: 0:00:09 time: 0.3472 data: 0.0033 max mem: 22446 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3298 data: 0.0034 max mem: 22446 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3288 data: 0.0033 max mem: 22446 +eval (validation): [12] Total time: 0:00:25 (0.3994 s / it) +cv: [12] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.048 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [13] [ 0/400] eta: 0:22:39 lr: nan time: 3.3994 data: 3.0553 max mem: 22446 +train: [13] [ 20/400] eta: 0:03:45 lr: 0.000133 loss: 0.0801 (0.0772) grad: 0.0616 (0.0565) time: 0.4533 data: 0.0028 max mem: 22446 +train: [13] [ 40/400] eta: 0:03:09 lr: 0.000131 loss: 0.0670 (0.0717) grad: 0.0429 (0.0550) time: 0.4534 data: 0.0031 max mem: 22446 +train: [13] [ 60/400] eta: 0:02:51 lr: 0.000130 loss: 0.0640 (0.0706) grad: 0.0409 (0.0536) time: 0.4583 data: 0.0034 max mem: 22446 +train: [13] [ 80/400] eta: 0:02:37 lr: 0.000128 loss: 0.0656 (0.0700) grad: 0.0409 (0.0509) time: 0.4594 data: 0.0034 max mem: 22446 +train: [13] [100/400] eta: 0:02:25 lr: 0.000127 loss: 0.0677 (0.0704) grad: 0.0399 (0.0500) time: 0.4631 data: 0.0034 max mem: 22446 +train: [13] [120/400] eta: 0:02:14 lr: 0.000125 loss: 0.0673 (0.0702) grad: 0.0426 (0.0503) time: 0.4519 data: 0.0034 max mem: 22446 +train: [13] [140/400] eta: 0:02:03 lr: 0.000124 loss: 0.0657 (0.0709) grad: 0.0434 (0.0497) time: 0.4517 data: 0.0033 max mem: 22446 +train: [13] [160/400] eta: 0:01:54 lr: 0.000122 loss: 0.0640 (0.0707) grad: 0.0424 (0.0492) time: 0.4779 data: 0.0035 max mem: 22446 +train: [13] [180/400] eta: 0:01:45 lr: 0.000120 loss: 0.0719 (0.0717) grad: 0.0426 (0.0499) time: 0.4834 data: 0.0029 max mem: 22446 +train: [13] [200/400] eta: 0:01:35 lr: 0.000119 loss: 0.0769 (0.0724) grad: 0.0455 (0.0502) time: 0.4572 data: 0.0034 max mem: 22446 +train: [13] [220/400] eta: 0:01:25 lr: 0.000117 loss: 0.0723 (0.0726) grad: 0.0455 (0.0502) time: 0.4600 data: 0.0035 max mem: 22446 +train: [13] [240/400] eta: 0:01:15 lr: 0.000116 loss: 0.0692 (0.0725) grad: 0.0514 (0.0513) time: 0.4654 data: 0.0035 max mem: 22446 +train: [13] [260/400] eta: 0:01:06 lr: 0.000114 loss: 0.0668 (0.0724) grad: 0.0548 (0.0514) time: 0.4565 data: 0.0034 max mem: 22446 +train: [13] [280/400] eta: 0:00:56 lr: 0.000113 loss: 0.0629 (0.0721) grad: 0.0468 (0.0513) time: 0.4689 data: 0.0034 max mem: 22446 +train: [13] [300/400] eta: 0:00:48 lr: 0.000111 loss: 0.0676 (0.0720) grad: 0.0461 (0.0515) time: 0.6265 data: 0.1689 max mem: 22446 +train: [13] [320/400] eta: 0:00:38 lr: 0.000110 loss: 0.0676 (0.0719) grad: 0.0441 (0.0510) time: 0.4722 data: 0.0028 max mem: 22446 +train: [13] [340/400] eta: 0:00:28 lr: 0.000108 loss: 0.0660 (0.0715) grad: 0.0392 (0.0504) time: 0.4440 data: 0.0034 max mem: 22446 +train: [13] [360/400] eta: 0:00:19 lr: 0.000107 loss: 0.0602 (0.0711) grad: 0.0404 (0.0502) time: 0.4671 data: 0.0035 max mem: 22446 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 0.0634 (0.0710) grad: 0.0412 (0.0498) time: 0.4495 data: 0.0035 max mem: 22446 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.0685 (0.0712) grad: 0.0412 (0.0497) time: 0.4505 data: 0.0034 max mem: 22446 +train: [13] Total time: 0:03:10 (0.4762 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.0685 (0.0712) grad: 0.0412 (0.0497) +eval (validation): [13] [ 0/63] eta: 0:03:22 time: 3.2155 data: 2.9425 max mem: 22446 +eval (validation): [13] [20/63] eta: 0:00:21 time: 0.3574 data: 0.0045 max mem: 22446 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3482 data: 0.0032 max mem: 22446 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3284 data: 0.0032 max mem: 22446 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3270 data: 0.0032 max mem: 22446 +eval (validation): [13] Total time: 0:00:24 (0.3938 s / it) +cv: [13] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.048 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:22:25 lr: nan time: 3.3632 data: 3.0206 max mem: 22446 +train: [14] [ 20/400] eta: 0:03:50 lr: 0.000102 loss: 0.0639 (0.0701) grad: 0.0385 (0.0382) time: 0.4698 data: 0.0037 max mem: 22446 +train: [14] [ 40/400] eta: 0:03:11 lr: 0.000101 loss: 0.0664 (0.0693) grad: 0.0391 (0.0411) time: 0.4529 data: 0.0032 max mem: 22446 +train: [14] [ 60/400] eta: 0:02:53 lr: 0.000099 loss: 0.0652 (0.0675) grad: 0.0391 (0.0408) time: 0.4669 data: 0.0034 max mem: 22446 +train: [14] [ 80/400] eta: 0:02:38 lr: 0.000098 loss: 0.0586 (0.0671) grad: 0.0407 (0.0426) time: 0.4530 data: 0.0035 max mem: 22446 +train: [14] [100/400] eta: 0:02:28 lr: 0.000096 loss: 0.0575 (0.0666) grad: 0.0400 (0.0416) time: 0.4881 data: 0.0037 max mem: 22446 +train: [14] [120/400] eta: 0:02:16 lr: 0.000095 loss: 0.0636 (0.0683) grad: 0.0390 (0.0418) time: 0.4602 data: 0.0034 max mem: 22446 +train: [14] [140/400] eta: 0:02:05 lr: 0.000093 loss: 0.0705 (0.0692) grad: 0.0435 (0.0432) time: 0.4526 data: 0.0032 max mem: 22446 +train: [14] [160/400] eta: 0:01:55 lr: 0.000092 loss: 0.0590 (0.0685) grad: 0.0391 (0.0429) time: 0.4715 data: 0.0035 max mem: 22446 +train: [14] [180/400] eta: 0:01:45 lr: 0.000090 loss: 0.0543 (0.0674) grad: 0.0348 (0.0422) time: 0.4643 data: 0.0034 max mem: 22446 +train: [14] [200/400] eta: 0:01:35 lr: 0.000089 loss: 0.0584 (0.0679) grad: 0.0365 (0.0422) time: 0.4545 data: 0.0034 max mem: 22446 +train: [14] [220/400] eta: 0:01:25 lr: 0.000088 loss: 0.0669 (0.0677) grad: 0.0398 (0.0423) time: 0.4635 data: 0.0033 max mem: 22446 +train: [14] [240/400] eta: 0:01:16 lr: 0.000086 loss: 0.0646 (0.0674) grad: 0.0404 (0.0423) time: 0.4591 data: 0.0033 max mem: 22446 +train: [14] [260/400] eta: 0:01:06 lr: 0.000085 loss: 0.0577 (0.0666) grad: 0.0420 (0.0421) time: 0.4593 data: 0.0034 max mem: 22446 +train: [14] [280/400] eta: 0:00:56 lr: 0.000083 loss: 0.0641 (0.0674) grad: 0.0413 (0.0420) time: 0.4729 data: 0.0034 max mem: 22446 +train: [14] [300/400] eta: 0:00:48 lr: 0.000082 loss: 0.0698 (0.0678) grad: 0.0413 (0.0424) time: 0.6193 data: 0.1755 max mem: 22446 +train: [14] [320/400] eta: 0:00:38 lr: 0.000081 loss: 0.0625 (0.0675) grad: 0.0388 (0.0423) time: 0.4635 data: 0.0037 max mem: 22446 +train: [14] [340/400] eta: 0:00:28 lr: 0.000079 loss: 0.0682 (0.0679) grad: 0.0379 (0.0420) time: 0.4618 data: 0.0030 max mem: 22446 +train: [14] [360/400] eta: 0:00:19 lr: 0.000078 loss: 0.0723 (0.0681) grad: 0.0398 (0.0420) time: 0.4643 data: 0.0035 max mem: 22446 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 0.0723 (0.0682) grad: 0.0419 (0.0420) time: 0.4550 data: 0.0037 max mem: 22446 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.0626 (0.0681) grad: 0.0383 (0.0418) time: 0.4491 data: 0.0034 max mem: 22446 +train: [14] Total time: 0:03:11 (0.4776 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.0626 (0.0681) grad: 0.0383 (0.0418) +eval (validation): [14] [ 0/63] eta: 0:03:28 time: 3.3143 data: 3.0740 max mem: 22446 +eval (validation): [14] [20/63] eta: 0:00:21 time: 0.3649 data: 0.0041 max mem: 22446 +eval (validation): [14] [40/63] eta: 0:00:09 time: 0.3485 data: 0.0028 max mem: 22446 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3292 data: 0.0031 max mem: 22446 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3277 data: 0.0031 max mem: 22446 +eval (validation): [14] Total time: 0:00:25 (0.3986 s / it) +cv: [14] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.047 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [15] [ 0/400] eta: 0:22:31 lr: nan time: 3.3781 data: 2.9929 max mem: 22446 +train: [15] [ 20/400] eta: 0:03:50 lr: 0.000074 loss: 0.0627 (0.0689) grad: 0.0370 (0.0413) time: 0.4676 data: 0.0037 max mem: 22446 +train: [15] [ 40/400] eta: 0:03:10 lr: 0.000072 loss: 0.0666 (0.0699) grad: 0.0407 (0.0423) time: 0.4461 data: 0.0033 max mem: 22446 +train: [15] [ 60/400] eta: 0:02:53 lr: 0.000071 loss: 0.0601 (0.0653) grad: 0.0382 (0.0412) time: 0.4709 data: 0.0034 max mem: 22446 +train: [15] [ 80/400] eta: 0:02:39 lr: 0.000070 loss: 0.0563 (0.0647) grad: 0.0344 (0.0399) time: 0.4680 data: 0.0032 max mem: 22446 +train: [15] [100/400] eta: 0:02:27 lr: 0.000068 loss: 0.0586 (0.0633) grad: 0.0344 (0.0398) time: 0.4574 data: 0.0034 max mem: 22446 +train: [15] [120/400] eta: 0:02:15 lr: 0.000067 loss: 0.0573 (0.0630) grad: 0.0366 (0.0396) time: 0.4508 data: 0.0034 max mem: 22446 +train: [15] [140/400] eta: 0:02:04 lr: 0.000066 loss: 0.0626 (0.0630) grad: 0.0361 (0.0393) time: 0.4467 data: 0.0032 max mem: 22446 +train: [15] [160/400] eta: 0:01:54 lr: 0.000064 loss: 0.0619 (0.0627) grad: 0.0368 (0.0394) time: 0.4695 data: 0.0036 max mem: 22446 +train: [15] [180/400] eta: 0:01:44 lr: 0.000063 loss: 0.0569 (0.0632) grad: 0.0371 (0.0389) time: 0.4586 data: 0.0033 max mem: 22446 +train: [15] [200/400] eta: 0:01:34 lr: 0.000062 loss: 0.0693 (0.0641) grad: 0.0356 (0.0392) time: 0.4569 data: 0.0034 max mem: 22446 +train: [15] [220/400] eta: 0:01:25 lr: 0.000061 loss: 0.0621 (0.0635) grad: 0.0367 (0.0392) time: 0.4603 data: 0.0034 max mem: 22446 +train: [15] [240/400] eta: 0:01:15 lr: 0.000059 loss: 0.0599 (0.0632) grad: 0.0371 (0.0392) time: 0.4640 data: 0.0033 max mem: 22446 +train: [15] [260/400] eta: 0:01:06 lr: 0.000058 loss: 0.0576 (0.0633) grad: 0.0371 (0.0391) time: 0.4666 data: 0.0033 max mem: 22446 +train: [15] [280/400] eta: 0:00:56 lr: 0.000057 loss: 0.0545 (0.0633) grad: 0.0357 (0.0391) time: 0.4858 data: 0.0037 max mem: 22446 +train: [15] [300/400] eta: 0:00:48 lr: 0.000056 loss: 0.0592 (0.0634) grad: 0.0356 (0.0391) time: 0.6393 data: 0.1707 max mem: 22446 +train: [15] [320/400] eta: 0:00:38 lr: 0.000054 loss: 0.0579 (0.0631) grad: 0.0377 (0.0392) time: 0.4572 data: 0.0027 max mem: 22446 +train: [15] [340/400] eta: 0:00:28 lr: 0.000053 loss: 0.0572 (0.0632) grad: 0.0403 (0.0394) time: 0.4564 data: 0.0034 max mem: 22446 +train: [15] [360/400] eta: 0:00:19 lr: 0.000052 loss: 0.0573 (0.0633) grad: 0.0409 (0.0394) time: 0.4633 data: 0.0035 max mem: 22446 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 0.0540 (0.0628) grad: 0.0382 (0.0392) time: 0.4609 data: 0.0036 max mem: 22446 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.0606 (0.0629) grad: 0.0347 (0.0390) time: 0.4542 data: 0.0034 max mem: 22446 +train: [15] Total time: 0:03:11 (0.4776 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.0606 (0.0629) grad: 0.0347 (0.0390) +eval (validation): [15] [ 0/63] eta: 0:03:16 time: 3.1190 data: 2.8837 max mem: 22446 +eval (validation): [15] [20/63] eta: 0:00:21 time: 0.3596 data: 0.0033 max mem: 22446 +eval (validation): [15] [40/63] eta: 0:00:09 time: 0.3480 data: 0.0030 max mem: 22446 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3254 data: 0.0030 max mem: 22446 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3260 data: 0.0030 max mem: 22446 +eval (validation): [15] Total time: 0:00:24 (0.3926 s / it) +cv: [15] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.047 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:22:22 lr: nan time: 3.3551 data: 2.9598 max mem: 22446 +train: [16] [ 20/400] eta: 0:03:43 lr: 0.000048 loss: 0.0681 (0.0687) grad: 0.0375 (0.0400) time: 0.4496 data: 0.0041 max mem: 22446 +train: [16] [ 40/400] eta: 0:03:06 lr: 0.000047 loss: 0.0630 (0.0661) grad: 0.0367 (0.0394) time: 0.4475 data: 0.0032 max mem: 22446 +train: [16] [ 60/400] eta: 0:02:50 lr: 0.000046 loss: 0.0617 (0.0643) grad: 0.0347 (0.0377) time: 0.4634 data: 0.0035 max mem: 22446 +train: [16] [ 80/400] eta: 0:02:37 lr: 0.000045 loss: 0.0617 (0.0639) grad: 0.0331 (0.0371) time: 0.4634 data: 0.0035 max mem: 22446 +train: [16] [100/400] eta: 0:02:25 lr: 0.000044 loss: 0.0592 (0.0639) grad: 0.0335 (0.0370) time: 0.4617 data: 0.0034 max mem: 22446 +train: [16] [120/400] eta: 0:02:14 lr: 0.000043 loss: 0.0552 (0.0631) grad: 0.0373 (0.0373) time: 0.4534 data: 0.0035 max mem: 22446 +train: [16] [140/400] eta: 0:02:04 lr: 0.000042 loss: 0.0570 (0.0633) grad: 0.0382 (0.0375) time: 0.4576 data: 0.0033 max mem: 22446 +train: [16] [160/400] eta: 0:01:54 lr: 0.000041 loss: 0.0585 (0.0631) grad: 0.0349 (0.0374) time: 0.4695 data: 0.0036 max mem: 22446 +train: [16] [180/400] eta: 0:01:44 lr: 0.000040 loss: 0.0630 (0.0638) grad: 0.0347 (0.0375) time: 0.4540 data: 0.0034 max mem: 22446 +train: [16] [200/400] eta: 0:01:34 lr: 0.000039 loss: 0.0643 (0.0644) grad: 0.0393 (0.0380) time: 0.4501 data: 0.0035 max mem: 22446 +train: [16] [220/400] eta: 0:01:24 lr: 0.000038 loss: 0.0603 (0.0639) grad: 0.0375 (0.0378) time: 0.4542 data: 0.0034 max mem: 22446 +train: [16] [240/400] eta: 0:01:14 lr: 0.000036 loss: 0.0520 (0.0633) grad: 0.0321 (0.0373) time: 0.4538 data: 0.0034 max mem: 22446 +train: [16] [260/400] eta: 0:01:05 lr: 0.000035 loss: 0.0596 (0.0639) grad: 0.0345 (0.0375) time: 0.4496 data: 0.0035 max mem: 22446 +train: [16] [280/400] eta: 0:00:56 lr: 0.000034 loss: 0.0585 (0.0636) grad: 0.0376 (0.0378) time: 0.4721 data: 0.0031 max mem: 22446 +train: [16] [300/400] eta: 0:00:47 lr: 0.000033 loss: 0.0570 (0.0635) grad: 0.0403 (0.0381) time: 0.6192 data: 0.1745 max mem: 22446 +train: [16] [320/400] eta: 0:00:38 lr: 0.000032 loss: 0.0594 (0.0635) grad: 0.0386 (0.0382) time: 0.4583 data: 0.0032 max mem: 22446 +train: [16] [340/400] eta: 0:00:28 lr: 0.000031 loss: 0.0625 (0.0637) grad: 0.0386 (0.0384) time: 0.4575 data: 0.0035 max mem: 22446 +train: [16] [360/400] eta: 0:00:18 lr: 0.000031 loss: 0.0587 (0.0635) grad: 0.0358 (0.0383) time: 0.4701 data: 0.0036 max mem: 22446 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 0.0542 (0.0630) grad: 0.0358 (0.0382) time: 0.4590 data: 0.0035 max mem: 22446 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.0534 (0.0629) grad: 0.0399 (0.0384) time: 0.4492 data: 0.0034 max mem: 22446 +train: [16] Total time: 0:03:09 (0.4732 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.0534 (0.0629) grad: 0.0399 (0.0384) +eval (validation): [16] [ 0/63] eta: 0:03:20 time: 3.1820 data: 2.8979 max mem: 22446 +eval (validation): [16] [20/63] eta: 0:00:20 time: 0.3457 data: 0.0034 max mem: 22446 +eval (validation): [16] [40/63] eta: 0:00:09 time: 0.3500 data: 0.0029 max mem: 22446 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3385 data: 0.0033 max mem: 22446 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3374 data: 0.0032 max mem: 22446 +eval (validation): [16] Total time: 0:00:24 (0.3941 s / it) +cv: [16] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.047 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:23:07 lr: nan time: 3.4692 data: 3.0761 max mem: 22446 +train: [17] [ 20/400] eta: 0:03:54 lr: 0.000028 loss: 0.0515 (0.0579) grad: 0.0340 (0.0357) time: 0.4747 data: 0.0025 max mem: 22446 +train: [17] [ 40/400] eta: 0:03:14 lr: 0.000027 loss: 0.0594 (0.0635) grad: 0.0343 (0.0382) time: 0.4569 data: 0.0034 max mem: 22446 +train: [17] [ 60/400] eta: 0:02:55 lr: 0.000026 loss: 0.0662 (0.0656) grad: 0.0387 (0.0377) time: 0.4678 data: 0.0035 max mem: 22446 +train: [17] [ 80/400] eta: 0:02:40 lr: 0.000025 loss: 0.0689 (0.0663) grad: 0.0362 (0.0379) time: 0.4647 data: 0.0033 max mem: 22446 +train: [17] [100/400] eta: 0:02:29 lr: 0.000024 loss: 0.0614 (0.0648) grad: 0.0358 (0.0374) time: 0.4726 data: 0.0035 max mem: 22446 +train: [17] [120/400] eta: 0:02:17 lr: 0.000023 loss: 0.0591 (0.0645) grad: 0.0341 (0.0371) time: 0.4573 data: 0.0036 max mem: 22446 +train: [17] [140/400] eta: 0:02:06 lr: 0.000023 loss: 0.0634 (0.0640) grad: 0.0339 (0.0373) time: 0.4502 data: 0.0032 max mem: 22446 +train: [17] [160/400] eta: 0:01:55 lr: 0.000022 loss: 0.0565 (0.0633) grad: 0.0342 (0.0373) time: 0.4606 data: 0.0034 max mem: 22446 +train: [17] [180/400] eta: 0:01:45 lr: 0.000021 loss: 0.0565 (0.0626) grad: 0.0349 (0.0372) time: 0.4692 data: 0.0034 max mem: 22446 +train: [17] [200/400] eta: 0:01:35 lr: 0.000020 loss: 0.0527 (0.0622) grad: 0.0355 (0.0373) time: 0.4510 data: 0.0034 max mem: 22446 +train: [17] [220/400] eta: 0:01:25 lr: 0.000019 loss: 0.0539 (0.0617) grad: 0.0366 (0.0372) time: 0.4624 data: 0.0036 max mem: 22446 +train: [17] [240/400] eta: 0:01:15 lr: 0.000019 loss: 0.0539 (0.0617) grad: 0.0326 (0.0372) time: 0.4616 data: 0.0037 max mem: 22446 +train: [17] [260/400] eta: 0:01:06 lr: 0.000018 loss: 0.0589 (0.0620) grad: 0.0380 (0.0374) time: 0.4492 data: 0.0035 max mem: 22446 +train: [17] [280/400] eta: 0:00:56 lr: 0.000017 loss: 0.0612 (0.0621) grad: 0.0359 (0.0372) time: 0.4695 data: 0.0037 max mem: 22446 +train: [17] [300/400] eta: 0:00:48 lr: 0.000016 loss: 0.0592 (0.0622) grad: 0.0350 (0.0374) time: 0.6402 data: 0.1764 max mem: 22446 +train: [17] [320/400] eta: 0:00:38 lr: 0.000016 loss: 0.0589 (0.0622) grad: 0.0397 (0.0377) time: 0.4468 data: 0.0031 max mem: 22446 +train: [17] [340/400] eta: 0:00:28 lr: 0.000015 loss: 0.0546 (0.0622) grad: 0.0354 (0.0374) time: 0.4801 data: 0.0037 max mem: 22446 +train: [17] [360/400] eta: 0:00:19 lr: 0.000014 loss: 0.0546 (0.0619) grad: 0.0307 (0.0372) time: 0.4620 data: 0.0035 max mem: 22446 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 0.0535 (0.0620) grad: 0.0338 (0.0370) time: 0.4641 data: 0.0034 max mem: 22446 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.0535 (0.0619) grad: 0.0338 (0.0370) time: 0.4654 data: 0.0035 max mem: 22446 +train: [17] Total time: 0:03:11 (0.4791 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.0535 (0.0619) grad: 0.0338 (0.0370) +eval (validation): [17] [ 0/63] eta: 0:03:23 time: 3.2295 data: 2.9945 max mem: 22446 +eval (validation): [17] [20/63] eta: 0:00:20 time: 0.3437 data: 0.0042 max mem: 22446 +eval (validation): [17] [40/63] eta: 0:00:09 time: 0.3501 data: 0.0030 max mem: 22446 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3269 data: 0.0030 max mem: 22446 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3285 data: 0.0030 max mem: 22446 +eval (validation): [17] Total time: 0:00:24 (0.3907 s / it) +cv: [17] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.047 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:22:29 lr: nan time: 3.3742 data: 3.0074 max mem: 22446 +train: [18] [ 20/400] eta: 0:03:50 lr: 0.000012 loss: 0.0564 (0.0625) grad: 0.0346 (0.0366) time: 0.4687 data: 0.0029 max mem: 22446 +train: [18] [ 40/400] eta: 0:03:10 lr: 0.000012 loss: 0.0564 (0.0637) grad: 0.0357 (0.0367) time: 0.4449 data: 0.0034 max mem: 22446 +train: [18] [ 60/400] eta: 0:02:52 lr: 0.000011 loss: 0.0552 (0.0629) grad: 0.0373 (0.0367) time: 0.4666 data: 0.0036 max mem: 22446 +train: [18] [ 80/400] eta: 0:02:39 lr: 0.000011 loss: 0.0646 (0.0635) grad: 0.0373 (0.0370) time: 0.4649 data: 0.0036 max mem: 22446 +train: [18] [100/400] eta: 0:02:27 lr: 0.000010 loss: 0.0584 (0.0632) grad: 0.0366 (0.0372) time: 0.4685 data: 0.0035 max mem: 22446 +train: [18] [120/400] eta: 0:02:16 lr: 0.000009 loss: 0.0518 (0.0612) grad: 0.0360 (0.0366) time: 0.4593 data: 0.0036 max mem: 22446 +train: [18] [140/400] eta: 0:02:05 lr: 0.000009 loss: 0.0615 (0.0618) grad: 0.0353 (0.0369) time: 0.4540 data: 0.0034 max mem: 22446 +train: [18] [160/400] eta: 0:01:55 lr: 0.000008 loss: 0.0652 (0.0613) grad: 0.0350 (0.0369) time: 0.4704 data: 0.0034 max mem: 22446 +train: [18] [180/400] eta: 0:01:45 lr: 0.000008 loss: 0.0543 (0.0608) grad: 0.0376 (0.0372) time: 0.4642 data: 0.0036 max mem: 22446 +train: [18] [200/400] eta: 0:01:35 lr: 0.000007 loss: 0.0527 (0.0605) grad: 0.0365 (0.0371) time: 0.4608 data: 0.0037 max mem: 22446 +train: [18] [220/400] eta: 0:01:25 lr: 0.000007 loss: 0.0549 (0.0603) grad: 0.0342 (0.0370) time: 0.4668 data: 0.0035 max mem: 22446 +train: [18] [240/400] eta: 0:01:16 lr: 0.000006 loss: 0.0549 (0.0600) grad: 0.0348 (0.0369) time: 0.4687 data: 0.0035 max mem: 22446 +train: [18] [260/400] eta: 0:01:06 lr: 0.000006 loss: 0.0537 (0.0603) grad: 0.0324 (0.0367) time: 0.4642 data: 0.0034 max mem: 22446 +train: [18] [280/400] eta: 0:00:56 lr: 0.000006 loss: 0.0581 (0.0603) grad: 0.0360 (0.0368) time: 0.4667 data: 0.0034 max mem: 22446 +train: [18] [300/400] eta: 0:00:48 lr: 0.000005 loss: 0.0585 (0.0601) grad: 0.0365 (0.0370) time: 0.6499 data: 0.1826 max mem: 22446 +train: [18] [320/400] eta: 0:00:38 lr: 0.000005 loss: 0.0550 (0.0601) grad: 0.0351 (0.0370) time: 0.4543 data: 0.0028 max mem: 22446 +train: [18] [340/400] eta: 0:00:28 lr: 0.000004 loss: 0.0593 (0.0603) grad: 0.0367 (0.0370) time: 0.4686 data: 0.0034 max mem: 22446 +train: [18] [360/400] eta: 0:00:19 lr: 0.000004 loss: 0.0574 (0.0600) grad: 0.0367 (0.0372) time: 0.4686 data: 0.0034 max mem: 22446 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 0.0518 (0.0596) grad: 0.0331 (0.0369) time: 0.4623 data: 0.0034 max mem: 22446 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.0509 (0.0594) grad: 0.0332 (0.0369) time: 0.4619 data: 0.0032 max mem: 22446 +train: [18] Total time: 0:03:12 (0.4803 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.0509 (0.0594) grad: 0.0332 (0.0369) +eval (validation): [18] [ 0/63] eta: 0:03:25 time: 3.2626 data: 3.0213 max mem: 22446 +eval (validation): [18] [20/63] eta: 0:00:21 time: 0.3526 data: 0.0043 max mem: 22446 +eval (validation): [18] [40/63] eta: 0:00:09 time: 0.3580 data: 0.0030 max mem: 22446 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3302 data: 0.0033 max mem: 22446 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3305 data: 0.0032 max mem: 22446 +eval (validation): [18] Total time: 0:00:25 (0.3973 s / it) +cv: [18] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.047 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:22:53 lr: nan time: 3.4346 data: 3.0444 max mem: 22446 +train: [19] [ 20/400] eta: 0:03:52 lr: 0.000003 loss: 0.0608 (0.0601) grad: 0.0327 (0.0345) time: 0.4710 data: 0.0029 max mem: 22446 +train: [19] [ 40/400] eta: 0:03:12 lr: 0.000003 loss: 0.0580 (0.0574) grad: 0.0339 (0.0357) time: 0.4562 data: 0.0034 max mem: 22446 +train: [19] [ 60/400] eta: 0:02:53 lr: 0.000002 loss: 0.0566 (0.0578) grad: 0.0348 (0.0351) time: 0.4569 data: 0.0036 max mem: 22446 +train: [19] [ 80/400] eta: 0:02:39 lr: 0.000002 loss: 0.0570 (0.0591) grad: 0.0360 (0.0365) time: 0.4582 data: 0.0035 max mem: 22446 +train: [19] [100/400] eta: 0:02:28 lr: 0.000002 loss: 0.0615 (0.0605) grad: 0.0387 (0.0365) time: 0.4828 data: 0.0033 max mem: 22446 +train: [19] [120/400] eta: 0:02:17 lr: 0.000002 loss: 0.0615 (0.0604) grad: 0.0350 (0.0365) time: 0.4668 data: 0.0034 max mem: 22446 +train: [19] [140/400] eta: 0:02:06 lr: 0.000001 loss: 0.0555 (0.0600) grad: 0.0339 (0.0366) time: 0.4561 data: 0.0033 max mem: 22446 +train: [19] [160/400] eta: 0:01:56 lr: 0.000001 loss: 0.0543 (0.0605) grad: 0.0338 (0.0364) time: 0.4721 data: 0.0033 max mem: 22446 +train: [19] [180/400] eta: 0:01:45 lr: 0.000001 loss: 0.0563 (0.0603) grad: 0.0326 (0.0363) time: 0.4682 data: 0.0035 max mem: 22446 +train: [19] [200/400] eta: 0:01:35 lr: 0.000001 loss: 0.0563 (0.0603) grad: 0.0357 (0.0364) time: 0.4616 data: 0.0034 max mem: 22446 +train: [19] [220/400] eta: 0:01:25 lr: 0.000001 loss: 0.0592 (0.0608) grad: 0.0357 (0.0365) time: 0.4523 data: 0.0034 max mem: 22446 +train: [19] [240/400] eta: 0:01:16 lr: 0.000001 loss: 0.0592 (0.0604) grad: 0.0340 (0.0363) time: 0.4565 data: 0.0031 max mem: 22446 +train: [19] [260/400] eta: 0:01:06 lr: 0.000000 loss: 0.0534 (0.0602) grad: 0.0340 (0.0363) time: 0.4597 data: 0.0033 max mem: 22446 +train: [19] [280/400] eta: 0:00:56 lr: 0.000000 loss: 0.0546 (0.0604) grad: 0.0350 (0.0364) time: 0.4705 data: 0.0034 max mem: 22446 +train: [19] [300/400] eta: 0:00:48 lr: 0.000000 loss: 0.0575 (0.0603) grad: 0.0361 (0.0365) time: 0.6232 data: 0.1680 max mem: 22446 +train: [19] [320/400] eta: 0:00:38 lr: 0.000000 loss: 0.0564 (0.0601) grad: 0.0387 (0.0366) time: 0.4652 data: 0.0026 max mem: 22446 +train: [19] [340/400] eta: 0:00:28 lr: 0.000000 loss: 0.0560 (0.0599) grad: 0.0387 (0.0366) time: 0.4617 data: 0.0033 max mem: 22446 +train: [19] [360/400] eta: 0:00:19 lr: 0.000000 loss: 0.0560 (0.0599) grad: 0.0354 (0.0367) time: 0.4551 data: 0.0033 max mem: 22446 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 0.0521 (0.0598) grad: 0.0354 (0.0367) time: 0.4523 data: 0.0033 max mem: 22446 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.0521 (0.0597) grad: 0.0336 (0.0367) time: 0.4465 data: 0.0033 max mem: 22446 +train: [19] Total time: 0:03:10 (0.4775 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.0521 (0.0597) grad: 0.0336 (0.0367) +eval (validation): [19] [ 0/63] eta: 0:03:17 time: 3.1362 data: 2.8607 max mem: 22446 +eval (validation): [19] [20/63] eta: 0:00:21 time: 0.3566 data: 0.0040 max mem: 22446 +eval (validation): [19] [40/63] eta: 0:00:09 time: 0.3544 data: 0.0035 max mem: 22446 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3528 data: 0.0033 max mem: 22446 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3502 data: 0.0032 max mem: 22446 +eval (validation): [19] Total time: 0:00:25 (0.4029 s / it) +cv: [19] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.047 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.9923115079365079, "hparam": [6, 1.0], "hparam_id": 35, "epoch": 19, "is_best": false, "best_score": 0.9923115079365079} +eval (train): [20] [ 0/297] eta: 0:14:25 time: 2.9153 data: 2.6757 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:14 time: 0.3625 data: 0.0250 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:47 time: 0.3486 data: 0.0030 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:34 time: 0.3570 data: 0.0033 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:24 time: 0.3548 data: 0.0036 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:15 time: 0.3605 data: 0.0035 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:08 time: 0.3981 data: 0.0037 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:00 time: 0.3721 data: 0.0033 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:52 time: 0.3575 data: 0.0035 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:43 time: 0.3454 data: 0.0034 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:36 time: 0.3805 data: 0.0035 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:28 time: 0.3730 data: 0.0036 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:21 time: 0.3504 data: 0.0035 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3518 data: 0.0035 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3391 data: 0.0037 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3277 data: 0.0031 max mem: 22446 +eval (train): [20] Total time: 0:01:49 (0.3689 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:11 time: 3.0448 data: 2.7545 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:21 time: 0.3821 data: 0.0029 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3558 data: 0.0032 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3418 data: 0.0034 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3412 data: 0.0033 max mem: 22446 +eval (validation): [20] Total time: 0:00:25 (0.4060 s / it) +eval (test): [20] [ 0/79] eta: 0:03:59 time: 3.0316 data: 2.7962 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:28 time: 0.3500 data: 0.0029 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3869 data: 0.0031 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3748 data: 0.0035 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3256 data: 0.0032 max mem: 22446 +eval (test): [20] Total time: 0:00:31 (0.3970 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.9923115079365079, "hparam": [6, 1.0], "hparam_id": 35, "epoch": 14, "is_best": true, "best_score": 0.9923115079365079} +eval (train): [20] [ 0/297] eta: 0:15:20 time: 3.0979 data: 2.8064 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:15 time: 0.3605 data: 0.0040 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:47 time: 0.3457 data: 0.0029 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:33 time: 0.3418 data: 0.0035 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:22 time: 0.3312 data: 0.0033 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:12 time: 0.3344 data: 0.0031 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:05 time: 0.3583 data: 0.0034 max mem: 22446 +eval (train): [20] [140/297] eta: 0:00:57 time: 0.3745 data: 0.0035 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:50 time: 0.3571 data: 0.0037 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:42 time: 0.3463 data: 0.0032 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:35 time: 0.3419 data: 0.0033 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:27 time: 0.3444 data: 0.0035 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3499 data: 0.0034 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3606 data: 0.0036 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3607 data: 0.0037 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3330 data: 0.0031 max mem: 22446 +eval (train): [20] Total time: 0:01:46 (0.3600 s / it) +eval (validation): [20] [ 0/63] eta: 0:02:59 time: 2.8431 data: 2.5512 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:21 time: 0.3742 data: 0.0040 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:10 time: 0.3782 data: 0.0037 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3360 data: 0.0032 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3349 data: 0.0032 max mem: 22446 +eval (validation): [20] Total time: 0:00:25 (0.4048 s / it) +eval (test): [20] [ 0/79] eta: 0:04:11 time: 3.1861 data: 2.9340 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:29 time: 0.3658 data: 0.0193 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:17 time: 0.3833 data: 0.0034 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3577 data: 0.0034 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3263 data: 0.0032 max mem: 22446 +eval (test): [20] Total time: 0:00:31 (0.3971 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|-------:|-----:|------------:|:---------|:-----------|-----------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | hcpya_task21 | best | 14 | 0.0018 | 0.05 | 35 | [6, 1.0] | train | 4.0798e-05 | 1 | 0 | 1 | 0 | +| flat_mae | patch | attn | hcpya_task21 | best | 14 | 0.0018 | 0.05 | 35 | [6, 1.0] | validation | 0.047269 | 0.99231 | 0.0013946 | 0.99096 | 0.0018313 | +| flat_mae | patch | attn | hcpya_task21 | best | 14 | 0.0018 | 0.05 | 35 | [6, 1.0] | test | 0.064839 | 0.99008 | 0.0013837 | 0.98852 | 0.0017189 | + + +done! total time: 1:19:44 diff --git a/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/train_log.json b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..26dd840174a276b67f1d2a0205123f4787b353fc --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/hcpya_task21__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.08816081404686, "train/grad": 0.2132206715643406, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.055992431640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.05298583984375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.04787109375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.042823486328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.037786865234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.03081298828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.023004150390625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.014176025390625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.002696533203125, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.990426025390625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.978250732421875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.959954833984375, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.942105712890625, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.916258544921875, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.8913323974609373, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.86742919921875, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.8367462158203125, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.8001348876953127, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.758481140136719, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.7188870239257814, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.668607635498047, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.6150352478027346, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.5528903198242188, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.483629150390625, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.408740997314453, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.3158745193481445, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.2306110000610353, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.1522703742980958, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.045929594039917, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.921720976829529, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.8137673592567445, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.7195913517475128, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.5998357653617858, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.5013675616681577, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.3947651261836291, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.2989261683821678, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2057684186659754, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.1252228621579707, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0462196102179586, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.9572333956789225, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.8942463842406869, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.8435632949694991, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.7835284975077957, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.7365258821472526, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.6874336830247194, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.6413856362085789, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.6053456961549819, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.5661091299075633, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.5355802967026829, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.040217070411890744, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.040153693994507196, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04004696836695075, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.039941848069429395, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03983601823449135, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03968859441578388, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.039522833470255134, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03933740294538438, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03909118736162782, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.038827126799151304, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03856419966556132, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.038167097251862285, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.037775310119614006, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03720536750741303, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03665486696176231, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0361329981777817, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03548989513888955, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03477147586643696, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03403215389698744, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.033404430337250234, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03270351705141365, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03204376630485058, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.031362952375784514, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03067211067304015, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.029976417711004615, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.029158272510394455, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02842895002104342, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.027770795510150492, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.026897076740860937, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.025895652617327868, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.025026005483232437, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.024234813493676483, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.023179238219745456, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02229029282461852, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.021323839959222823, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.020413015766534953, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.019545469059376045, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.018758914448553696, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.017994670884218068, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.017049657030729578, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.016113014708971606, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.01551641840254888, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.014909654029761441, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.014597706961794757, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.014533887044526636, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.014636375906993635, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.014468179080286064, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.014255099456640892, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01431277466006577, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.014578104019165, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.00541615486145, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.9903435707092285, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.975332736968994, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.960474967956543, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.9399917125701904, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.9170849323272705, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.8915581703186035, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.8585267066955566, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.823568105697632, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7897539138793945, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7395846843719482, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.691396474838257, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.62339448928833, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.559351921081543, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.499095916748047, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.423480749130249, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3346962928771973, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.234360694885254, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.139230966567993, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.018545627593994, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.8900996446609497, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.7429348230361938, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.5822776556015015, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.4146095514297485, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.2197036743164062, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.054354190826416, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.9136870503425598, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.7380689978599548, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.551305890083313, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.40636640787124634, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.30105826258659363, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.20503586530685425, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.15328413248062134, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11348062753677368, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.08210180699825287, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06547945737838745, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06160493195056915, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.061089660972356796, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.05623061954975128, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.04935739189386368, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.04656391963362694, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.045848548412323, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.04663293436169624, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.04662632197141647, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.05196501687169075, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.061166826635599136, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.06844919174909592, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.06561039388179779, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06944444444444445, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07217261904761904, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.08333333333333333, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.10689484126984126, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.14732142857142858, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.21180555555555555, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.24603174603174602, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.2537202380952381, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.2554563492063492, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.25223214285714285, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.24454365079365079, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.23313492063492064, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22470238095238096, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2177579365079365, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22321428571428573, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23586309523809523, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2666170634920635, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.30654761904761907, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.39012896825396826, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.4600694444444444, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.5131448412698413, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.5416666666666666, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.5768849206349206, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.6168154761904762, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.6949404761904762, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.7921626984126984, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8591269841269841, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8918650793650794, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8973214285714286, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9002976190476191, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9017857142857143, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9166666666666666, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.96875, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.984375, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9841269841269841, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.984375, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9841269841269841, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9779265873015873, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014244019406699716, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.015949068728008466, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.022005603032777703, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.03165259945415268, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.04479762794862942, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.05805639141615794, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.0645729538298612, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.06521516065270727, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06555721610564445, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06729870942617595, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.06656025054400215, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.06282469841727933, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06068889133142505, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.0577369031349539, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.06268585283433648, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.07160756433541174, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.09297826399703436, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.12922516007481905, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19457499762840372, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.24577486463072, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.27424879781070227, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.29340201130260063, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.3200124687896283, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.3834896521610284, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.550213341370484, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.7169028011506303, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.825576774382555, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8766864110832231, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8791633776654048, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8760134480932853, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8773115969586991, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8998067621731595, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.958036949388026, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9618114729529699, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.966403097917081, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9762912893494505, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9785535663666594, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9787429812137388, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9785675237623344, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9787514812602147, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9812026414694948, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9797360948743394, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9804410813353166, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9793713352230972, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.981984193601782, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9800450436226075, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9787181405887094, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9727817620178979, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9754438379839554, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 0.8942463842406869, "validation/loss_best": 0.04935739189386368, "validation/acc_best": 0.984375, "validation/f1_best": 0.9812026414694948} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.142191657423973, "train/grad": 0.15130306400358676, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.937904052734375, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.918460693359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.8868707275390624, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.8561407470703126, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.8262139892578126, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.7857745361328123, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.74135498046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.693544921875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.6329144287109374, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.570779113769531, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.511563415527344, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.4255331420898436, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.3440901184082032, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.228967742919922, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.1208325958251955, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.0187498474121095, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.8910018920898437, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.7436629486083985, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.5839002418518067, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.4405516624450683, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.2716580867767333, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.1074574756622315, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.9384691762924194, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.7772948092222214, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.6333476468920708, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.4916237224638462, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.3889776010066271, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.31333091743290425, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.23394439332187175, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.16755077538080512, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.12803394529037176, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.10399585124105215, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.08342631204053759, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.07230806197039782, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.06408692200668156, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0595399578846991, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05670904259197414, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05497581792064011, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.053963057566434144, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05382029113359749, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05468943651765585, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.05678881406784057, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.05681393069215119, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.05928041447885334, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.06563110874965787, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.06776947954669595, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.10721282770857216, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.12423288911581039, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.1616474012658, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.037375837285071614, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.036964887557551264, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.036296936431899664, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.035643780305981636, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.035009878305718306, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03415828298777342, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03324186975136399, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03229014225304127, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03116532389074564, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.03013039654120803, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02926048914901912, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028176488764584066, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027320650881156327, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02632322837598622, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.025540447616949676, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.024884672109037637, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02411596111021936, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02324560989625752, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022271593818441033, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.021359930825419725, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.020238427305594088, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.019079382182098925, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.017760680578649043, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.016324925934895873, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.014908442273736, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.013402034237515181, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.012148784855380654, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.011154360130894929, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.010000990625703707, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.008912868249462918, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.008264373654965311, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.007879605086054653, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.007529790363041684, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.007394414995796978, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0071701141679659484, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007004108898108825, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.006918455664417706, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00688842126110103, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006926414261106402, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.007070551762008108, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.007304227830609307, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.007558352879132144, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.007636924156686291, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00802912975777872, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.008827460194006563, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.008836364365415648, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.013168187971459702, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.014791570442757801, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01763937671261374, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.843287706375122, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.8116393089294434, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.760756015777588, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.711941957473755, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.665231704711914, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.6030163764953613, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.5360443592071533, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.4651358127593994, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.3764147758483887, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.285879373550415, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.199706792831421, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.0740363597869873, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.9547849893569946, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.7869216203689575, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.6313092708587646, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.4879260063171387, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.3155220746994019, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.1283761262893677, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.9395328164100647, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.7813597917556763, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.6056743860244751, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.4483969509601593, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.312717080116272, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.21869339048862457, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.15818500518798828, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.10849297791719437, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.07922926545143127, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06616668403148651, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.056395549327135086, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.05002521350979805, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04599082097411156, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04317052662372589, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04001947492361069, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.038275640457868576, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.0369083471596241, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03617558255791664, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03611917421221733, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.037294432520866394, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.03602810204029083, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.037017300724983215, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.0404324010014534, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.04410138353705406, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.03525783121585846, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06813614070415497, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.07987610995769501, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.1081637516617775, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.12210163474082947, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.47868508100509644, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.41019976139068604, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.25892857142857145, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2542162698412698, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.24379960317460317, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.23735119047619047, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.23387896825396826, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.2361111111111111, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.24553571428571427, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.26091269841269843, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.28596230158730157, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.36061507936507936, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.4350198412698413, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5124007936507936, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5443948412698413, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.5850694444444444, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6252480158730159, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6755952380952381, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.753968253968254, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8385416666666666, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8869047619047619, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9052579365079365, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9233630952380952, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9352678571428571, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9503968253968254, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9642857142857143, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9831349206349206, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.986359126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.988343253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.982390873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.980406746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9563492063492064, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9670138888888888, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0680268978436452, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.06751247332481579, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.06600054981588785, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.06323385283632792, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.06359188866669734, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.0658265526539646, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.0712723603236369, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.08097683003653147, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.10568467319076487, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.17413393771167274, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.22989417064121273, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.2795271113461167, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.30223160465749893, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.33885376453106386, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.40296806330101387, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.5194413700342336, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6675268767471737, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8014499389371854, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8663804977564437, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8912185056026987, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9143338977080123, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9288160893017254, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9470934426669197, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9619722024048375, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9723814718440925, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9773544290643411, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.979260049444287, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9808444368280281, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9831599304053134, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9834624763630447, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9843843112307304, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9847366167813428, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9861138891145855, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9860571880172666, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9863034846754921, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9871810547227801, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9879052713793732, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9874112593794303, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9877184753841661, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9868258055802566, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9877304089899891, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9858621229049928, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9881059262882688, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.980752226815876, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9764145078420955, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9719687152360437, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9836406735791601, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9369335066542241, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9564135179071543, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 0.05681393069215119, "validation/loss_best": 0.03525783121585846, "validation/acc_best": 0.9895833333333334, "validation/f1_best": 0.9881059262882688} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 0.8414699611067772, "train/grad": 0.19825187154114246, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.7275103759765624, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.6833270263671873, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.61354248046875, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.54774169921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.485523376464844, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.4030276489257814, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.3142800903320313, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.219495849609375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.099934387207031, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.97789794921875, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.862525405883789, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.697315101623535, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.546554412841797, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3455991744995117, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.1715526580810547, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.0204499053955078, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.8485075998306274, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.6735151958465576, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.5136208122968674, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.39690974712371824, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.28918210610747336, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.2107881186157465, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.15344211846590042, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.11560596071183682, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.09144171767868102, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.07453924756497145, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.06648504881188273, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0623064873740077, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.05854553204961121, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.05556395348161459, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.05372729763388634, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.05243551048450172, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.05154821896925568, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.05145014457404613, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.05269395155832171, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.05357959659770131, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05122165082022548, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05153903137892485, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.05601754802279174, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.06585108283907175, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.06466234065592288, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.07932401237078011, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.09416559191420674, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.14090763492509722, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.19496782655827702, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.33535612187348307, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.5389279534947127, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.1483500716648996, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.7685723053570837, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03343069402500987, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.032513423338532445, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03112550134770572, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02993774987757206, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02893992243334651, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.027831015707924962, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.026877202047035097, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02607392679899931, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.025268423855304718, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0245690209325403, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023940647607669233, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02301646207459271, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022110621761530638, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02079120843205601, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019565133852884175, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01844967634882778, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01713192571885884, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.015665181949734687, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.014133246641140431, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.012841059328056872, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.011480497025186196, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01025041651329957, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.00905504273949191, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.008086718051927164, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007403305678162724, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006872663995018229, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.006632390691665932, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0065410366817377505, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006483611992443912, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006435246338369325, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006440865766489878, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006462610223097727, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.006577542728628032, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.006681368152203504, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006942909137287643, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007069488988490775, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.006795058145653457, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006971418890170753, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007647341601259541, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008941408551763744, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.008988678668829379, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010448368038632907, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.011913377830933315, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.017677697864055517, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02163974005874479, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03345873565122019, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04914060110059495, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07091576179606113, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.1042393590458337, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.600977659225464, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.5442256927490234, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.4555201530456543, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.3725099563598633, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.294088125228882, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.189700126647949, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.0766546726226807, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.9557172060012817, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.8038533926010132, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.6504977941513062, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.5088400840759277, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.3131353855133057, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.143052577972412, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.9286244511604309, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.7520716786384583, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.6040364503860474, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.44515588879585266, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.307700514793396, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.21343523263931274, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.15964356064796448, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11312761157751083, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.08133939653635025, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.06437509506940842, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.055429503321647644, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04975588247179985, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.045210808515548706, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.042619962245225906, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04129457101225853, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03830855339765549, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.034886475652456284, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.033654145896434784, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03310579061508179, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03232094645500183, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.030890924856066704, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.030543627217411995, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.030394840985536575, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03907925263047218, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.05000479891896248, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.045332398265600204, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06292907148599625, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.0685543641448021, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.08120008558034897, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1847420334815979, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2851465046405792, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.41975706815719604, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5824538469314575, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.6098488569259644, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.123831868171692, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.0610527992248535, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.22371031746031747, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2333829365079365, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.25669642857142855, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.2829861111111111, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3335813492063492, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4236111111111111, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.4965277777777778, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5359623015873016, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5701884920634921, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6140873015873016, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6790674603174603, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8015873015873016, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8668154761904762, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8940972222222222, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9057539682539683, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9151785714285714, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9293154761904762, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9459325396825397, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9642857142857143, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9754464285714286, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9833829365079365, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.986359126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9816468253968254, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9784226190476191, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9610615079365079, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9603174603174603, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9660218253968254, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9503968253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9516369047619048, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9538690476190477, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.05774629071479389, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.06543973494358321, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.08035645328273902, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.10259583829071527, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.15025222708681138, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.22757439572199226, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.26911108750244594, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.29672122006719026, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.32117126685534525, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.38989351165704234, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.5333581225041922, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7510938153610371, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8384710081181569, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.873700418938391, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8909182892742569, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9032638389755088, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9214965607699387, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9428175965468499, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9623857240223403, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9749940306753266, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9785531516101975, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9817377528829183, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9827304385677182, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9832946997329854, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9840255003234646, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.985459307348805, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.98764394514166, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9879562300061567, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9876866731020192, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9881812586587557, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9881735133767388, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9883228099477338, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9873222564757831, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9882369195241991, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9880685058818547, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9883117030769115, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9856773157482696, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9818387522701021, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9859881166936534, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9826068111078141, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9843874231790857, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9773449784581393, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9622954644014318, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9532493017136164, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9639478293557138, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.96616581156339, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9410516618393772, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9475787409849032, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9313211904346639, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.05269395155832171, "validation/loss_best": 0.030543627217411995, "validation/acc_best": 0.9898313492063492, "validation/f1_best": 0.9880685058818547} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 0.7053434684872627, "train/grad": 0.26001176092773676, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.4793408203125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.4110256958007814, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.3034490966796874, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.201914825439453, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.104985809326172, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.975959014892578, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.8370516967773438, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.6911013793945313, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.5132051467895509, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.3411848831176758, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.1888857460021973, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.987643404006958, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.8191831398010254, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.6156455421447754, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4633468210697174, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.35377599477767946, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.25632431760430335, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.18307268410921096, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.13258830592036247, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.10330997815355658, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.08165746577084064, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.06995514873415232, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.06316035556606948, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.05864378459751606, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0552846475224942, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.05218102690763771, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.04986719427630305, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.04800482090562582, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.04565836058929563, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.04337865091860294, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.04176400632597506, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.040411825273185965, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.039147929800674316, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.03835621197707951, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.03780478340573609, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.03808560157194733, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.039639719128608704, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.04165235430933535, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.04454295122995973, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.051898945178836586, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07492572538554669, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.10224513661116362, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.175759405111894, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.3424466568417847, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.5812306409701705, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.886278959242627, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.1125036704726516, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.6155352899897844, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.7268149748817088, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.028328737374395133, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.027439975114539265, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.026360843693837524, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.025609580464661122, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.025032380884513258, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.024363448703661562, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023659078031778337, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022873017163947226, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02180804019793868, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020661199819296597, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019569843360222876, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.018064496153965593, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.016762983151711524, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.015049274619668722, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01351531129097566, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012171797845512628, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010691529063042254, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00924307673703879, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008012720734113828, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007218250145670026, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006580958843696863, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00627739351708442, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006169119839323684, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0061293405364267525, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006089005824760534, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006030603832914494, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0059964156127534805, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0059625081851845604, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0058961611444829035, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00580777269846294, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.005736882800119929, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.005697915416676551, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0056478078509098854, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.005569246794620995, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005455000998917967, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00561711972404737, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.005907818572741235, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006196849023472169, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006455687756388215, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.007806491940209525, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.010377228189536254, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.013311709860281552, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.021225523239700124, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.033904435453036966, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.046942658685306844, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0627292691547006, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07525525971444709, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.1004527273314503, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.16040213376426807, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.3316938877105713, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.2497777938842773, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.1205031871795654, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.9982492923736572, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.8821929693222046, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.7292059659957886, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.5675750970840454, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.4026635885238647, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.2093194723129272, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.030336856842041, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.8776577115058899, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.680954098701477, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5213583707809448, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.3478536307811737, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.24539193511009216, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.18536879122257233, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.13471239805221558, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.09396302700042725, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.07117193192243576, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.061190299689769745, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.05397891253232956, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04912153631448746, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04526139050722122, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04198160395026207, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.039256807416677475, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0370648130774498, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03706217184662819, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.039373863488435745, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04331651329994202, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.047166258096694946, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.0476076677441597, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04847364127635956, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.05168316140770912, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.053875360637903214, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05680001899600029, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06199200078845024, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.059259042143821716, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06362216919660568, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.0972810909152031, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.0615314356982708, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.11789976805448532, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3729846775531769, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.45804476737976074, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.3358737230300903, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.109691858291626, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.57076096534729, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.7461196184158325, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.870361804962158, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.3095040321350098, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.32018849206349204, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.38913690476190477, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.4851190476190476, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5277777777777778, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5560515873015873, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5947420634920635, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6468253968253969, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7361111111111112, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.826140873015873, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8740079365079365, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8903769841269841, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.90625, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9166666666666666, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9322916666666666, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9464285714285714, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9774305555555556, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9806547619047619, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9828869047619048, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.986359126984127, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9836309523809523, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9828869047619048, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.982390873015873, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9818948412698413, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9831349206349206, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9841269841269841, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9603174603174603, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9461805555555556, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9709821428571429, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9811507936507936, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.13910780829866487, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.19759345622266566, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.2622646506587668, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.2897780710643013, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.3081937051651479, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3555973316840792, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.46747021991072724, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.6546948378257189, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7914363334282305, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8508887010891483, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8726621894444418, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8923951790426184, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.90598154396093, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9252135564117933, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9420968906330587, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.962620154084752, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9766902365920157, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9794958813025904, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9815942250189471, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9833993090283073, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9842486894213287, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9849687165881414, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9851898481599028, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9868015502663277, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9875308434889557, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9884259050613616, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.987671595317906, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9871350769929274, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9853544366227088, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.983860032273188, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9832487133468722, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9824218475657948, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9833471459559989, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9826537430338065, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9829960769221402, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9828786824861478, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9836643400196426, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9840153211557472, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9715729474169772, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9822978458225542, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9800303074294481, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9415250039612204, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9518458513401213, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9357253772645058, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9696685811202441, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9666550676445473, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9703518100285119, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9708443548603537, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.980235066104941, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 0.05218102690763771, "validation/loss_best": 0.0370648130774498, "validation/acc_best": 0.9898313492063492, "validation/f1_best": 0.9884259050613616} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 0.7961138306558132, "train/grad": 0.4057922871410847, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.197282257080078, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.101223602294922, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.9499201965332031, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.8081602478027343, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.6755262756347655, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.5049479293823242, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.3305259704589845, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.1587461280822753, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.9635263633728027, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.7871068382263183, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.6398315262794495, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.4635237228870392, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.34092742323875425, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.22989487856626512, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.1676958554983139, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.12914226084947586, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09837830893695354, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.07901660848408937, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06858144661411643, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.06297424346208573, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.05843888556584716, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.054924238938838243, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.05185901536606252, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.049210326299071315, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04697870891541243, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.04493061978369951, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.043529802048578856, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.04273781190626323, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0417180038895458, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.03993574199266732, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.03873362640850246, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.038049010233953594, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0369362926390022, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.035571753811091185, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.03596863533370197, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.03927720478735864, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.041453816294670105, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0536821202468127, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07112464122474194, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1529435756150633, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.30592557837255296, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.38840070643462243, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.557874127663672, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.0039298279210924, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.2688218528032302, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.7097262128256263, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.1687145823147147, "train/loss_047_lr4.3e+01_wd1.0e+00": 5.49851234802045, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.332737333569676, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.025632355893030763, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.025073414808139204, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.024308858420699835, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023606359651312234, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02290333225391805, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02190137851051986, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020761369224637748, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01954361586365849, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018103637206368147, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016761921057477595, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.015559345954097808, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013800432630814612, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01217984591377899, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.010303247733972967, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008948709322139621, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00789961587288417, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006919227432226762, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006292199819581583, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006078081469750032, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0060173378622857855, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0059996298694750295, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005996496807783842, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006004662135092076, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006035209231777116, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0060954040638171135, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006184182126889937, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.006236282806494273, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006297233509540093, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006318152228632243, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006207105660578236, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006115658266353421, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0060677377341198735, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.005973582477745367, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.005959879941947292, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006011193412923603, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.006411600756036933, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007137607059266884, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.009296017828019103, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.011836711513024057, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.021630453188554385, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.028545171313399287, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.033470733030816734, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04765190426901051, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07137505654507709, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.08309867547165826, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.1280637298255895, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.14981324062815632, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.19981727322104692, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.19346618279416702, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.0392184257507324, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9283205270767212, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.754607081413269, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.5944768190383911, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.4481717348098755, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.2653089761734009, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.0852887630462646, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.9136931896209717, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7228704690933228, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.5539655685424805, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.42040562629699707, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.28336668014526367, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.20577074587345123, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.14004401862621307, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.09883008152246475, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.07666831463575363, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.06331726908683777, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.0553499236702919, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.05005095526576042, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04677814245223999, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.044025056064128876, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04231913015246391, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04100948944687843, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04019596055150032, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04084555059671402, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0454380102455616, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.05031205713748932, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.053728148341178894, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.05531301721930504, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.05493542179465294, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.05268627405166626, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.0508577786386013, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.05184852331876755, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.05465615913271904, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05685492977499962, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06765004247426987, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06134504824876785, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11745427548885345, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1692548245191574, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.4436253607273102, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.352417528629303, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5685195922851562, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.043666958808899, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.5259720087051392, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.4712789058685303, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.790985107421875, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.098072052001953, "validation/loss_047_lr4.3e+01_wd1.0e+00": 8.268198013305664, "validation/loss_048_lr5.0e+01_wd1.0e+00": 6.621779441833496, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5190972222222222, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5493551587301587, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5865575396825397, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6391369047619048, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7130456349206349, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8075396825396826, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8695436507936508, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8921130952380952, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9037698412698413, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9141865079365079, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9273313492063492, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.953125, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9697420634920635, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9779265873015873, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9809027777777778, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9833829365079365, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9836309523809523, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.982390873015873, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9818948412698413, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9828869047619048, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.966765873015873, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9637896825396826, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.28049179180419076, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.30034365315189343, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.3357897253742693, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.4448525580155901, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6010305783352847, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7575174944180803, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8425847523339282, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8701099971471018, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8860974181632576, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9012850726718398, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9196877613285294, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9513721638195652, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9689213380727737, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9771835531034637, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9798796056500333, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.982128276633317, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9833732435665009, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9841951399399138, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9854187551728175, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9862346567898699, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9863685714318441, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9871979978613472, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9874532263346177, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9877245621229507, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9876610829413833, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9840648750381917, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.983659527058623, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9826784768538559, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9824349949679487, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9820399924291746, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9837823521430832, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9843659579646129, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9842789128703818, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9838837100366545, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9849913258182683, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9813572028813098, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9811170890788711, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9744662620826972, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9748255804930398, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9718954816511872, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9779540011699099, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9723523964161668, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.949940714906332, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.975207038241869, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9791506150639565, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9742472797962536, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9664360713739196, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9606159909613423, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9604512742951921, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 0.049210326299071315, "validation/loss_best": 0.04019596055150032, "validation/acc_best": 0.9890873015873016, "validation/f1_best": 0.9877245621229507} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 0.8369502240419388, "train/grad": 0.4671271876990795, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.9056591796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.781610641479492, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.5905562591552735, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.4190630722045898, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.266264991760254, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.080550079345703, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.9016802787780762, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7332126569747924, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.5514904451370239, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.4045345771312714, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.30398105680942533, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.21134549468755723, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.15772401317954063, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.10995774529874325, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.08614481642842292, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.07497692549601197, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.06748661826364696, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.062181727224960924, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.058250557631254196, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.055481528108939526, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.05276217687875032, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.05029260351322591, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.04784215379506349, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.04534384443424642, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04274378260597587, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.03989566513337195, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.03741428009234369, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.03526786928065121, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.03258858419023454, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.029790438609197736, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.02810178623534739, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.026475829938426614, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.024976993342861534, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0243333435151726, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.02957079962827265, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.03881686659529805, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05201798003166914, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.10390316217206419, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.12096619484946132, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.3648388663586229, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.41247684133239093, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.6538990081008523, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.984206892726943, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.375382590163499, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.067032093470916, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.5517204910703004, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.102884597480297, "train/loss_047_lr4.3e+01_wd1.0e+00": 6.777328786961735, "train/loss_048_lr5.0e+01_wd1.0e+00": 8.03553431452252, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023972576791420577, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02335680183954537, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02230640315450728, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02122754741460085, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020164514775387943, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01877532558515668, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.017398558245040475, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01608486959245056, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.014533793656155466, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012912969319149852, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.011456980037037283, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.009822641650680452, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008682800359092652, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0073397058516275135, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.00657681024633348, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006277027851901948, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006165103862294927, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006119226989685558, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006113476888858713, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006097809916245751, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006074318324099295, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006029314678162336, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006013790440920275, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006032041842117906, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006030772681115195, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005994809810072183, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005879820796544664, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005727449646801688, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00549449139580247, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005301910145790316, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.005202200429048389, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00512821183699998, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00513155966931663, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.004941892300412292, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005722333006342523, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007093040170366294, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009916788367045229, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.015153600853082878, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01677638313175066, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03200341124966008, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0390004099366255, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.050614100217547725, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.07167163279212174, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.08010374987083693, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.11290692228443067, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.1299793225798929, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.1721742133174596, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.2202660083025694, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.23810970716178417, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.7557319402694702, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.6208161115646362, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.4172067642211914, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.2392781972885132, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.0848264694213867, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9010968208312988, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7259899377822876, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.5630080699920654, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.39617541432380676, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.279201865196228, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.209227055311203, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.14539659023284912, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.1044178158044815, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.07323043793439865, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.06109486520290375, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.054571960121393204, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04924820363521576, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04503563791513443, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04170200601220131, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.039339520037174225, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.037181269377470016, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03546065092086792, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03413885086774826, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03337671607732773, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03284453973174095, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03239341080188751, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.032718367874622345, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.0336739681661129, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.0364406481385231, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03964786231517792, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03952058404684067, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04114145785570145, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.039147086441516876, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.042458005249500275, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.054750725626945496, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.057069070637226105, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07157937437295914, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11156633496284485, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14915847778320312, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.27602770924568176, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.685175359249115, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.6799477934837341, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.2166457176208496, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.7316393852233887, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.4147565364837646, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.543577194213867, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.8540384769439697, "validation/loss_047_lr4.3e+01_wd1.0e+00": 5.824049949645996, "validation/loss_048_lr5.0e+01_wd1.0e+00": 5.611590385437012, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5969742063492064, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6376488095238095, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7284226190476191, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.810515873015873, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8630952380952381, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8881448412698413, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9020337301587301, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9129464285714286, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9278273809523809, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9471726190476191, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9615575396825397, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9769345238095238, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9809027777777778, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9831349206349206, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.988343253968254, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.982390873015873, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.980406746031746, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.980406746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9610615079365079, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9650297619047619, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9747023809523809, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9747023809523809, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3568948068287964, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4340728951972666, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6185880863890071, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7589321255956845, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8325921879335245, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8667708032945834, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8856206874047504, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9017173972124158, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9209067151011929, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9427206910436047, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9598068329962427, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9759046759975215, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9795968080418633, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9817006667895114, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9828353106001695, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9835711289400723, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9841126120215298, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9860122596921495, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9864042299039446, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9864042299039446, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9863869533169285, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9866690188535558, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9879638514448269, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9872563264130256, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9873226660567744, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9872541900011265, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9872551855360945, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9886923774574492, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9859457949783474, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9849992122777843, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9861660791393666, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9857157605046658, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9866413490947944, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9857282971607211, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9799096387954213, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9777321611231473, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9794517413572618, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9807190104138245, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9790312097081271, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9747271136997098, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.971081322415292, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.969476529444272, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9772860565408401, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9564417797783222, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.961281414009671, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9660374291457624, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.975074787439215, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9680032636652055, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.963499412868215, "id_best": 27, "lr_best": 0.00047999999999999996, "wd_best": 0.05, "train/loss_best": 0.03526786928065121, "validation/loss_best": 0.0336739681661129, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.9886923774574492} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.5331837414205074, "train/grad": 0.3793169128894806, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.639715576171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4969016265869142, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.285747299194336, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1054911231994629, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9511580848693848, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.7689765739440918, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5980495119094849, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.4472420024871826, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.31020386099815367, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.22361174166202546, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.17150386020541192, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.12083111487329007, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.0919926305487752, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.07341227186843753, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.06532797610387206, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.06062645498663187, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.05631157441996038, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.05255350354127586, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0492211386654526, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.046622733604162934, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.04369282132014632, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.040871111983433364, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.03783470181748271, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.03488251604139805, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.03205150467343629, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.029140996923670174, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.02710198750719428, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.02554408462718129, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.023879595743492247, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.02197169352322817, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.02088029520586133, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.01994559621438384, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.019736278038471937, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.019826400689780713, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.01925636099651456, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.029493473041802644, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0429177409503609, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.07467442259192467, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.11686269663274289, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1892461653240025, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.27098207088187337, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.25844568946398794, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.5540103358402848, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.8286256904900074, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.2411422760412096, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.6482585944887251, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.2955823687464, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.140628235824406, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.403017492787913, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022542554875835775, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02167063906788826, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020229286095127464, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01889142770320177, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.017718520718626677, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.016332870558835567, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.014960198709741235, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.013476593126542867, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.011601181647274644, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.010067810732871295, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.00900685683824122, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007696938479784876, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.006768237305805087, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006228567082434893, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006080731413094327, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006017525201314129, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005968492687679827, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005918779797502794, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005876350943581201, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0058468001749133695, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005806871178210713, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005719076380482874, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.005593356557656079, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005463929364050273, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005320255677215755, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005163915144657949, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005042549428253551, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004918031291599618, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.004769684220227646, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.004602551372845483, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00449334464075946, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004355202416463726, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004529868906392949, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.004785178044448913, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004324706322822749, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0064100672466702235, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.01036092708244439, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.012035959461111077, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.015316213294075141, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02070732215502094, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.028774490884807203, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.028941225853137853, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04904416524186633, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06789865870551448, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.09209233607811022, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.09542658540103995, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.12332893673121592, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.19394621057404698, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.1785868478939119, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5134834051132202, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.365976095199585, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.152802586555481, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9745334982872009, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8235707879066467, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6458640694618225, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.4823989272117615, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.34747257828712463, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.23782438039779663, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.1714199334383011, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.12846386432647705, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.08690030127763748, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.06849434226751328, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05668710172176361, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.050536707043647766, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04667690768837929, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04316002503037453, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04020337387919426, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.037687189877033234, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.035804182291030884, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.034016456454992294, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03269597142934799, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03151564300060272, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03087393194437027, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.030421506613492966, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.030344057828187943, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.030446412041783333, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.030450141057372093, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.030497564002871513, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.030642805621027946, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.031813062727451324, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.034254495054483414, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03728567436337471, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.037650082260370255, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.04323242977261543, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05542249232530594, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.11968683451414108, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11680082231760025, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1522684544324875, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.22746863961219788, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.38289615511894226, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5801265835762024, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.1283589601516724, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.020939588546753, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.3205697536468506, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.2486257553100586, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.2447378635406494, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.925452709197998, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.807068347930908, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.675843253968254, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.753968253968254, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8435019841269841, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8871527777777778, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9045138888888888, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9213789682539683, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9332837301587301, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9499007936507936, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9662698412698413, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9744543650793651, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9786706349206349, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9831349206349206, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.986359126984127, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9806547619047619, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9838789682539683, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9784226190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9776785714285714, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5225172004537675, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6764687554754192, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8192220646291253, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8748922378257494, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8957960061010585, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9162104478981321, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9303916240187637, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9482198084161434, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9653280182929906, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9738187310091633, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9777093987301991, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9817526737704506, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9823157449258938, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9834620631041998, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.984550364040864, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9855489376068471, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9862777687042483, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9870642934058053, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9874216072924105, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9871073406100289, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.986969967672705, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9876914305813287, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9882689652008875, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.988005433755164, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9882038158831344, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.988675883785451, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.988675883785451, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9892218600119568, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9898446608667151, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9898381193437691, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9895544783830792, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.988826382794375, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9888295812800937, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9910906763525811, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9887242993442721, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9853766609657232, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9804784995833946, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9850362854794796, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9867367511537756, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9813672478524135, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.984845161472917, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9790169782242378, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9787329804889143, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9793227638244808, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.976115774437982, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9756012177091222, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9784255903236869, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9769822658099759, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9760975865118046, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 0.019826400689780713, "validation/loss_best": 0.037650082260370255, "validation/acc_best": 0.9915674603174603, "validation/f1_best": 0.9910906763525811} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.34443987600505355, "train/grad": 0.2784055599570274, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.4141443252563477, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.2642914581298828, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.051015796661377, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8748847579956055, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7264045429229736, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.5546209383010864, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.4047345042228699, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.29156788408756257, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.2043385225534439, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.14978088945150375, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.11382594205439091, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.08431497830897569, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.07186422185972333, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.06291661062277853, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.05776437018066645, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.05417280248366296, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.05063031225465238, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04732006500475108, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.044172675861045715, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.041533786114305256, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.038517327327281234, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.035623322380706667, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.032518793903291224, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.029481355575844647, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.026527931122109293, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.023386054150760175, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.02078763543628156, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.018528633890673517, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.01573956985026598, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.012907254016026855, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.011083043180406094, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.009705479945987463, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.009254518803209067, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.010142570240423083, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.010403314353898168, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.008940924564376474, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.04034777176566422, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0521396235562861, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07594350349158048, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07282564412802457, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.1572237110324204, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.11875206008553504, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.2576485477760434, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.541742441272363, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.868263680730015, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.8277721913810819, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.1959812013991178, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.171477021202445, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.6195899615529923, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02137735011987388, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02031834837049246, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018699906184338033, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.017324680779129267, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01615356404799968, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.014692266690544784, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.013050447078421712, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.011352580564562231, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009695391547866165, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.00842395106330514, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.007345524684060365, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.006388902325415984, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.006083931069588289, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005927101502893492, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005836312556639313, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0057566593255614865, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005665429384680465, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005554149242816493, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005439719111891463, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005342916055815295, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005213741706102155, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005079231252311729, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0049322719959309326, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004779124287015293, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004619808285497129, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004433676220069174, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004251303874043515, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004070925803971477, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0037950740784435766, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.003456039056691225, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.003138801889763272, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.002934995886407705, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.003006510078703286, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.003277020673485822, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.003965036918539226, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0032169046076660377, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007976039258937355, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010310952969648035, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01323913982369803, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01348627901235539, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01962801005171236, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.017767791838482202, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03023023878744243, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04715356015701594, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.07427070454363252, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.06677472024713774, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.09278316732791524, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.126060110097025, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.1306563603092714, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3142902851104736, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.163475513458252, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.9522095918655396, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.7787020802497864, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.6328815817832947, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.4666339159011841, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.3307107388973236, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.23627203702926636, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.16478842496871948, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.1165737584233284, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.08628463000059128, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.06584861129522324, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.05684356763958931, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.0497121661901474, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04552394151687622, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04271598905324936, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04015162214636803, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03773124888539314, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03569464385509491, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03423620015382767, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03312334045767784, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.032545410096645355, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03224441409111023, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03238266706466675, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03259911388158798, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.032751791179180145, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.032985638827085495, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03313308209180832, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03384438157081604, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03779090940952301, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.038981784135103226, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.040581248700618744, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04287338629364967, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.04290666803717613, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.042738936841487885, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.047017451375722885, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14010435342788696, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.12656916677951813, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13849300146102905, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2409912794828415, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.38560909032821655, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4203220009803772, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.7191767692565918, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.1828314065933228, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.4070662260055542, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.4969218969345093, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.6089186668395996, "validation/loss_047_lr4.3e+01_wd1.0e+00": 4.070654392242432, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.5784928798675537, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7752976190476191, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8392857142857143, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8921130952380952, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9094742063492064, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9223710317460317, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9347718253968254, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9528769841269841, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9660218253968254, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9747023809523809, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9789186507936508, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.982390873015873, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.988343253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.988343253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9816468253968254, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7059034204148348, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8080963778903187, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8787830880373338, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9013929637162376, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9166174370159426, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9315433795257885, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.951679768278653, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9653021866567109, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9741876330797997, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9779625040225215, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9806607833446305, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9824631851899703, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9834128971208294, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9852392716425952, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9871277143818888, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9871277143818888, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9875104184586758, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9869782595779244, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9869782595779244, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9873301496921372, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9875124127104241, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9875672578560724, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9874598707293607, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9878253203110665, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9873273543217681, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9873273543217681, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9875104693756092, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.987112879754586, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9876540965897511, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.986643087522445, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9870695155446595, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9866944678512355, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9891193503327526, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9893352712651236, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9903931254033045, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9877707531308365, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9833880975307839, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9827391640467757, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.983738588615872, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9846025568870429, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9809333121077137, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9841121578444149, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.982980562517017, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9819524408142208, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9823640242596722, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.982504229000474, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9817553091299073, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9754020805504288, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9795530587806653, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.010403314353898168, "validation/loss_best": 0.042738936841487885, "validation/acc_best": 0.9910714285714286, "validation/f1_best": 0.9903931254033045} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.23576025575399398, "train/grad": 0.21070458879694343, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.225849380493164, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.0754784965515136, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.8666131782531739, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6963146209716797, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.5549305057525635, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.4012441539764404, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.28463516652584075, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.20709091067314148, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.14660969212651254, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.10549033492803574, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.08333718758076429, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.06861323075369001, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.06129007114097476, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05500789794139564, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.050946063678711656, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04783791842870414, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.044612181736156345, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04137559968046844, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03821554752998054, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03548134492710233, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03245966142974794, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02966203983873129, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.026808225037530064, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.02398695893585682, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.02121869008988142, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.01818173173815012, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.015803568931296468, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.013824213137850165, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.011403269208967686, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.009108546851202846, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.007485897857695818, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.006153052924200892, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.005546611389145255, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.004444483099505305, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0037632100377231834, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.007334676040336489, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.025117586217820644, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.028522353349253536, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.05727720537222922, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.03742213083431125, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.10641713060438633, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06808904024772346, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.12655395745299758, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.24113295644521712, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.5534319944307208, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.30651326972059906, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.9907403008826077, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.4198773789033294, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.2629991577286273, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019826907152310013, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018656554482877254, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01697254961822182, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015587775842286647, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014353216416202486, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012675247327424586, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01090714211575687, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.00944250205066055, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.008123713083332405, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.006943753971718251, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.006223283805884421, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005825459770276212, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005683755064383149, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005568981264368631, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005490615687449463, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005411852561519481, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005323319672897924, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005212531755678356, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005087918655190152, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004966220242204144, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004825011578650447, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00470353951852303, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004566742695169523, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004416990680183517, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004217547895968891, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.003942800069708028, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0036994587189110462, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.003471899337164359, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.003141994215766317, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00274819136793667, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.002326384731568396, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.002071323677737382, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0018419968922125918, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0015222817446624503, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0013233109998384406, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0021793534548982052, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.005425088191518554, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006497129928400778, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.009423103443858945, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.007651894275340165, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.013192782952040993, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.013063205905475267, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.018847307924166827, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.030300610221673296, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05107359597263964, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.04063646125313358, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07207335350938314, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.10025497578576388, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0844547315706889, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.1554553508758545, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.0060930252075195, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7995724678039551, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6311383247375488, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.4926902651786804, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.34768107533454895, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.2443341612815857, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.17671720683574677, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.1212267279624939, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.08504696190357208, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.06846491992473602, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05687505379319191, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.05094008520245552, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.045680735260248184, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04231591895222664, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03994785621762276, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.037701208144426346, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03561202064156532, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.033785153180360794, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03241968899965286, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03105403669178486, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.029919931665062904, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.029088973999023438, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.028538422659039497, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.0283203125, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.028579169884324074, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.029244353994727135, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.029651883989572525, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.030353857204318047, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03195049986243248, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.032924022525548935, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03309722989797592, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03257905691862106, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.04149287939071655, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03760627657175064, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.07288359850645065, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.08542413264513016, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11153896152973175, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1665583997964859, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.24814029037952423, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2644514739513397, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3870898187160492, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.7376930117607117, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.7337691187858582, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.5249428749084473, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.25360107421875, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1283812522888184, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.385660171508789, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.25496506690979, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8449900793650794, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8809523809523809, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8993055555555556, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9131944444444444, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9226190476190477, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9387400793650794, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9618055555555556, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9729662698412699, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9786706349206349, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9826388888888888, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.986359126984127, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9836309523809523, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.986359126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9838789682539683, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8150246069787922, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8591986766218812, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8824998119978007, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9018032047093079, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9152841669309806, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9348583153116129, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9601174958018216, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9721331565430318, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9775973859681579, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9813801086426909, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9823616680020542, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9830886043552647, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9843579065088717, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9855073795942746, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9852132657114051, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9855958383337045, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9865040699084263, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.986945285266611, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9871688256941362, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9872437775393638, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9879947041524023, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9879953436461122, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9889805746530788, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9895328371725278, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9883958171825531, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9887174778925989, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9888585076252816, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9893079091413662, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.989631100958281, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9892694770164457, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9899914994218479, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9893545823345704, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9895312938685575, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9891431080683233, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9910683591382533, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9839959669052929, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9849351728127644, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9875241913159805, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9847799387245669, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9839314816138037, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9887972205141503, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9837906285145086, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9851321042243895, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9854089662605174, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9837817012252433, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9834277587856218, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9761568105365309, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9817075429825943, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.981897580439852, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.0037632100377231834, "validation/loss_best": 0.03760627657175064, "validation/acc_best": 0.9915674603174603, "validation/f1_best": 0.9910683591382533} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.1591764135658741, "train/grad": 0.1565465963073075, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.0947782707214355, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9458128547668457, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7405608749389648, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.574705400466919, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.4422182536125183, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.31087532758712766, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.22150947868824006, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.1629645386338234, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.11361695617437363, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.0853095580637455, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.07280598785728216, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.06361490855924785, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.0583676721714437, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05326872456818819, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04959462032653392, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04667879916727543, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.043519608937203885, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04017567628994584, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03686078645288944, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03396399254910648, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.030618601413443684, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.027526217065751552, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.024386532669886947, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.02141012957319617, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.018674968713894485, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.01563626829534769, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.01317399219609797, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.011254851669073106, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.008914338108152151, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0067072939779609445, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.005380120696499944, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.004490594957023859, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0020020941458642485, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.002140930462628603, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0008502149023115635, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.007396889617666602, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.009292963165789843, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.009184281928464771, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.02071507280692458, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.015851363353431226, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.02532430037856102, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.04683533027768135, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0754527690820396, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.09216116837225855, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.21099858723580836, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.23382082471624016, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.5618143845070154, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.5635683050379157, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.5428587756678462, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01893936480395496, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01777402503415942, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.016149766338057817, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014753930899314583, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.013401909735985101, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01155692427419126, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.009912097079213708, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.00866322927409783, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.007338341370923445, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0064338979800231755, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.006099050282500684, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005931369513273239, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005856715303380043, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005786137652467005, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005725512715289369, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005657888912828639, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005573616378824226, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005472742038837169, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0053406558593269435, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005211578793241642, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0050119825909496285, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0048066719601047225, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004543406579468865, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004286602077045245, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00404556225068518, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00371736203567707, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.003370705752458889, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0030444960341992557, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0025626265251048608, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0020560410969483202, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0017076442650795797, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.001430910229082656, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0008892428848912459, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0007963177451233605, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0003864524206380793, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0024684860329745105, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0026264662891540436, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.002821524845914587, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.005422164416385785, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.004013810058395027, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.006458208124653084, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010127822355861021, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.011782410644597592, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02008813478231714, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.030023087638310356, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.028117830560594418, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.055595386096721905, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.05759705751618215, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.059341698344594906, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.0306779146194458, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8841968178749084, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6823981404304504, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5201389789581299, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.39328107237815857, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.2730256915092468, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.1934230476617813, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.13938213884830475, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.09340178221464157, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.0703083947300911, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.0600549578666687, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05205205827951431, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04749545454978943, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04341993108391762, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04046352207660675, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03836832940578461, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03624347224831581, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03414732962846756, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03251532465219498, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.031450431793928146, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.030517561361193657, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.02994014322757721, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.029581496492028236, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.029883448034524918, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.030949508771300316, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0325964130461216, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03387099876999855, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03452802821993828, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03379613533616066, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03321381285786629, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03326147794723511, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03531836345791817, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03648284077644348, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03908207267522812, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.040510911494493484, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05354680120944977, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12912076711654663, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.08763662725687027, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1529027372598648, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2180529683828354, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.23145242035388947, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.33344003558158875, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5900256633758545, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6633672714233398, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.0220866203308105, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.2785450220108032, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.7387465238571167, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.0043423175811768, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.7408053874969482, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8816964285714286, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8993055555555556, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9144345238095238, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9303075396825397, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9427083333333334, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9595734126984127, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9724702380952381, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9769345238095238, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9816468253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9838789682539683, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.986359126984127, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.986359126984127, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9900793650793651, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9841269841269841, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9831349206349206, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8638234647015075, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8847970562495879, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.906826006417907, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.925442443760757, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9404561670880751, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9592932692547536, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9720135205511964, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9760642599882849, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.981032238420899, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9826514483718503, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.982969752540031, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9847413460542795, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9852213144914705, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9858534582757159, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9866122868838574, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9873257945613257, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9868363228626077, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9862469399203084, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9874315602287673, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9882518132130771, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9889196111963142, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9877474802328174, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9879240040066875, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9883258091342636, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9873782009185348, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9870170158866456, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.986563867514306, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9878201715152592, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9887761209544411, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.989853565803128, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9899107585125513, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9890406890012554, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9900789149788309, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9893171737499377, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9910673225996867, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9880517256272123, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9815189647106767, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9868233432972909, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9853162978651242, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9843989828152689, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9889023451364258, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9861566678385216, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9865794224880217, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9844746252526084, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9870441110881232, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9837476324205374, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9820450549287629, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9797728405604768, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.980848795341126, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.0008502149023115635, "validation/loss_best": 0.040510911494493484, "validation/acc_best": 0.9915674603174603, "validation/f1_best": 0.9910673225996867} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.11849804438650607, "train/grad": 0.11881482787430286, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9853508377075195, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8402346038818359, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6408614444732667, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.48270306587219236, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.36327266454696655, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.2544674324989319, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.18318694025278093, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.13341625273227692, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.09338949512690306, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07500864923000336, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06652388351038098, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.0594452166557312, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05502535630948842, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.050431002136319875, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04701850687153637, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.044230229305103425, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04110638730227947, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.037751161959022284, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.034342649122700096, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.031334941405802966, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.027864178251475094, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02455462474375963, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.021214969297870992, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.017939131297171115, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.014837171649560332, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.01141323495656252, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.00875994436442852, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.006795724770054221, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0044994617532938715, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.002735863979905844, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.001930320030078292, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0009892395511269568, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0005744466558098793, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00033531205728650095, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00015621565282344818, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.001293560266494751, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0027878683898597957, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0005639263149350882, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.010552660236135126, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.026770520498976113, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.01394504520110786, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.014497969029471279, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.03434862835332751, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.04048190996050835, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.052199565330520274, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.15457398544065654, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.23345309486612678, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.31333283458836375, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.24390215313062072, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.017910105218179526, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.016789613687433303, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015255396049469709, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.013867018581368028, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.012461593723855913, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.010730688706971706, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.009331358494237066, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.008114441505167633, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.006750218225643039, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.006133958187419921, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005918263915809803, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005774116491666064, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0056843362282961605, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005575222480110824, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005475551555864513, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005384157907974441, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005260089837247506, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005112530432234053, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0049411983718164266, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004775067877053516, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004542972399794962, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004298024023300968, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004021685204133973, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003725007244065637, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0034120536210684805, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0029891635747844704, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.002602610530993843, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0022526191098950223, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.001726727409131854, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.001209446966604446, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0009598157898199133, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0005261966052648859, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00034527446807032904, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00022500456504076283, "train/grad_034_lr5.1e+00_wd1.0e+00": 9.335348634721185e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00082092468807204, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.001145876691891024, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0003218141019833219, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0030320669334451124, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.005613167916967098, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0026651093295823676, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.005661865611007954, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.008071975240502967, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.010636041925723426, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.01423477069654986, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.026005999253846005, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.03229711949879863, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.046079264156102105, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03652193566865976, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9334867000579834, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.7897870540618896, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.5927413702011108, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.4383968412876129, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.325219064950943, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.22578677535057068, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.1604623794555664, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.11325334757566452, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07787337154150009, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06261945515871048, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.05516756698489189, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.048810675740242004, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04501793161034584, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04130055755376816, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.03880074992775917, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.036938004195690155, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03517124801874161, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.0335511639714241, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03224851191043854, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03138919919729233, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03066256456077099, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.030225256457924843, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.030164461582899094, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.030417900532484055, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.031118124723434448, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.032366808503866196, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.033126480877399445, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03341870382428169, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03418201953172684, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.034843314439058304, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03446739539504051, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03575536236166954, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03699013590812683, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03897996246814728, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03949463739991188, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05397931486368179, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07076533138751984, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.08305129408836365, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.16385211050510406, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2150176763534546, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2278321236371994, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3274884521961212, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.6260133385658264, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6637623906135559, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.9907926321029663, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.0103013515472412, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.6119544506072998, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.6299726963043213, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.8815405368804932, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8898809523809523, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.904265873015873, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.917906746031746, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.933531746031746, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9513888888888888, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9665178571428571, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9754464285714286, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9799107142857143, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9831349206349206, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9841269841269841, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.988343253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9898313492063492, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.986359126984127, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.986359126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9809027777777778, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8729177489526788, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8923540224357115, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9107742778520854, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9304151249474483, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9494346122612045, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9655760698966851, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9744525072302478, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9790491522148441, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9815875256909715, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9828232527595505, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.98306056882817, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9852087813940591, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9856605691099871, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9869634586957122, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9862415005152785, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9858856778901485, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9867036568476053, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.98786461745009, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.988292197170179, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9881035119641294, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9879251127084677, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9884178351275721, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9872451531578802, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9879725795400848, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9884215205150083, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9880612896220572, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9885560180443836, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.988099869370482, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9880903206398347, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9888052587106965, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9889333273637861, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9895229229917687, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9893482961663034, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9889599833571692, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9899601904948434, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.989474024196178, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.987686760023419, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9876380130658082, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.983773989672501, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9835433872981023, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9881958069681405, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9853300341161548, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9855053316355329, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9842317295947185, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9850066403406339, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9821069791084983, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9853211783176309, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9737949274855334, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9790615018961145, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.0009892395511269568, "validation/loss_best": 0.03575536236166954, "validation/acc_best": 0.9913194444444444, "validation/f1_best": 0.9895229229917687} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.09765339709818363, "train/grad": 0.08939482166431845, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.895388822555542, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.753630313873291, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.5601904392242432, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.41188168168067935, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.30697876870632174, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.21629243284463884, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.1560564798116684, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.1119543906673789, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.0817542091012001, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.0689656294696033, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06252799612469971, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.05665364554151893, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05285697841085493, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04872989466413855, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04558934424072504, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04293629539199174, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.039911150438711046, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03657634693197906, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03315657692961395, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.030100094862282278, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.026608763625845314, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.023268148405477404, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.0197546867094934, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.016268120454624294, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.012865807069465518, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.009223543563857675, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.006718003852292895, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.005031028296798468, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0034071721881628035, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0016923129558563232, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0010943817440420389, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0006398510653525591, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0003335186652839184, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00023614265955984592, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00011697953566908837, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00013342329300940038, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.001588644776493311, "train/loss_037_lr8.3e+00_wd1.0e+00": 5.981692112982273e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.002967495573684573, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.01038186770863831, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0015649814903736115, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.004034548746421934, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.014145763395354151, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.020834104781970383, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.02915671487338841, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.08136245199479163, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0917161513492465, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.1420942497625947, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.24558635018765926, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.017147663780488074, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015976183768361806, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01430044104810804, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012685638945549726, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.011132364203222096, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009452867999207228, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.008199712201021612, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.007080914143007249, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.00616584895295091, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005857558452989906, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005742108462145552, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005659186267876066, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005602612268994563, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005530374065856449, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005461147448513657, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005396305597387254, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0053099808123079125, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0052006926439935345, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005067501569283195, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004930227455042768, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00470751215092605, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004453051122400211, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004136879331490491, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003767210622318089, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0033291745601309233, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.002764605096745072, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0022605567614300526, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0018403504887464805, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0013526574063143927, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0007311791888423613, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0005532688279436116, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00033812731918715145, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00014563168380618663, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00011740180445599436, "train/grad_034_lr5.1e+00_wd1.0e+00": 6.291107906577054e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00013888031183469706, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00041794662374429416, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00010477488083893118, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0017739611510691942, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0026790834659043343, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0013481713792876928, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0016374560840147249, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.004706003793532011, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.003994382201178775, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00848657200155974, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.013850433299880165, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01747504087416132, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02770745199088863, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03190274135940384, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.8587106466293335, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.7174623012542725, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.5247995853424072, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.3795132040977478, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.27940797805786133, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.1945459246635437, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.13742883503437042, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.0953579843044281, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.06920181959867477, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.05786847323179245, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.051866352558135986, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.04645492509007454, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.0431816391646862, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03982458636164665, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.03755412623286247, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03588420897722244, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03419265151023865, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03277476131916046, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03151918202638626, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03064657934010029, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.029868951067328453, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.02943788468837738, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.02943436987698078, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.029826687648892403, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.030562836676836014, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0315607413649559, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03236478939652443, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.032955318689346313, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.033858563750982285, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03447720780968666, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.035244207829236984, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03671741113066673, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.037182148545980453, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.038953591138124466, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.039290882647037506, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04949622601270676, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06508452445268631, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.08266688138246536, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1287326216697693, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.18104878067970276, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.20876502990722656, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.27005505561828613, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5625209212303162, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5964924693107605, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6503426432609558, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8430115580558777, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.3192358016967773, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.6832733154296875, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.7450026273727417, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8970734126984127, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9067460317460317, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9213789682539683, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9382440476190477, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9561011904761905, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9677579365079365, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9774305555555556, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9813988095238095, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9841269841269841, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.984375, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9908234126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.986359126984127, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.982390873015873, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8823216811752465, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8961064905567678, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.915011240600093, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9346462187838485, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.954124692699611, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9669914279640863, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.976428128595921, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9804848032666984, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9828232527595505, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9830014144114462, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.985037988488053, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9860268122005232, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9851728955268355, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9860732040208313, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9871583173467661, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9873264505087855, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9872377235419739, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9873658138457921, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9879491125042209, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9884117781262205, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9889066203126489, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9886763194469284, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9889186935142217, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9882080142011848, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9883866934972287, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9883905365241402, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9880318959800531, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9889778224656046, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.988444218052746, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9890741718192283, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9890157838278216, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9899318086803613, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9902098770098475, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9896740186439114, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9901302981257482, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9903747419237829, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9879397883083959, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.988007112501438, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9886298087670291, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9854628951938247, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9897885897237015, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9877367592006829, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9848737673757575, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9836163242014567, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9872808305197428, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9851169157562596, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9839483667377406, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9824055992425478, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9814119863620508, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.0006398510653525591, "validation/loss_best": 0.03671741113066673, "validation/acc_best": 0.9915674603174603, "validation/f1_best": 0.9899318086803613} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.08161722617223859, "train/grad": 0.061241233022883534, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.8379211807250977, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.6976644420623779, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.5078449702262878, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.3676098477840424, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.2734763365983963, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.19405647605657578, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.13910901620984079, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.09943880453705788, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.0763812644034624, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06625244535505771, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.060712359054014085, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.0553628091327846, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05173968589864671, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.0475632337667048, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04429958960041404, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04152745896950364, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.038392428625375036, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.034992268569767476, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03149074966087937, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.02841952346265316, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.024893181258812548, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.021443547494709492, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.017792985923588276, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.01421204298734665, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.01081623868085444, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.007491702539846301, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.005146326823160052, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0035186513885855676, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.002009251732379198, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0010257898829877377, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0006500952038913966, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0004553317930549383, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0002806470636278391, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00019843001849949359, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00011039013043045998, "train/loss_035_lr6.0e+00_wd1.0e+00": 5.1184985786676406e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00014530004002153874, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0381070896983147e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0017707225400954486, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0012469914462417365, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.003842262951657176, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0005558240599930287, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.011681955708190798, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.004972235569730401, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.03148212304338813, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.013891721153631806, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.027043062383309006, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.02237571595236659, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.07587519589811563, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.016678004078567028, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015575020471587778, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01391979488544166, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012260604100301862, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.010756397740915418, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009228157058823854, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.008001486296998338, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006783355458173901, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0060756793897598985, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005870237172348425, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005781798672978766, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005693042405182496, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005611124649294652, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005496072139358148, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005377397228148766, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00525969453738071, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005120926024683285, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004958821204490959, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004783377516141627, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004622220112942159, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004393266491242684, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004137199391698232, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0038037480562343262, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0034159067946893627, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0029428171199106148, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0023447046149522068, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0018120309824735158, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.001364469833461044, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0008534523360867752, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00045586039159388746, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00027336908152392425, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00018640496493844695, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00011450410766883579, "train/grad_033_lr4.3e+00_wd1.0e+00": 8.740930012436365e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.6400202049644575e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.8627490446678795e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00011812489027120946, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.2774695203904078e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0010862591603167898, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0006546116731948417, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0012344702911267506, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0002834001774735099, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0028992071322994227, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.003086389552701875, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.004452353823984382, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.004308230399312462, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.007928169286681943, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.008605084730034225, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.013398627949565773, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.8015309572219849, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.6622820496559143, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.4744025766849518, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.33796393871307373, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.2483840137720108, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.17340031266212463, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.12094606459140778, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.0843096375465393, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.06396238505840302, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.054791513830423355, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04960722476243973, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.04490359127521515, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.041858598589897156, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.038871049880981445, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.03682905063033104, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03534269332885742, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03400235250592232, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.032836612313985825, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03182223066687584, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.031155385076999664, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.030463313683867455, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.030069809406995773, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.029820282012224197, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.030084621161222458, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03053642064332962, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.031674452126026154, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03272819146513939, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03346414491534233, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03433554992079735, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.035472091287374496, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03619476407766342, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03714626282453537, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.037292663007974625, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.038887180387973785, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.0393931120634079, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04839891567826271, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06503477692604065, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07848530262708664, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14074082672595978, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.15967686474323273, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.19760407507419586, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2436380684375763, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5181217789649963, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.596838116645813, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6910141110420227, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7588334083557129, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.1341125965118408, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.4410598278045654, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.0761795043945312, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9030257936507936, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9169146825396826, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9357638888888888, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9508928571428571, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9630456349206349, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9734623015873016, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9786706349206349, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9826388888888888, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9841269841269841, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9846230158730159, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.988343253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9918154761904762, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9841269841269841, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9861111111111112, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8919626646809953, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9102773648407363, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9331514324838004, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9497875090062454, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9620474732571868, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9724672637812329, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9776067786926289, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9813069524231911, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9828232527595505, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.983224923756976, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9855951226100462, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9856781080495969, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9851459666385965, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9855435114000406, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9861826101738946, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9852757808633076, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.985379092640792, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9861274551857309, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9859961598617233, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9866747032073148, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9871791865835465, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9875343182611883, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9879794849628144, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9882383428026076, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9886000339490502, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9890036940480358, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9884161634206475, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9880461041398734, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9883608152563303, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9884544317296053, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.988544938648474, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9890320029239315, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9894824358698108, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9896740186439114, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9903618538856904, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9907763854442877, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9868347801924041, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9878258730468827, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9870435843703728, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9857564125831031, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9903941039286679, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9880829078336758, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9835943579103233, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9848158308698678, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9852673882075573, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9838042041139963, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9853832142889599, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9826399040368898, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9849724267132027, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 5.1184985786676406e-05, "validation/loss_best": 0.04839891567826271, "validation/acc_best": 0.9920634920634921, "validation/f1_best": 0.9907763854442877} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.07118423396721482, "train/grad": 0.049717395529150964, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.7786026763916015, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.6393633127212525, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.45395123958587646, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.3229998278617859, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.23892803370952606, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.16902593821287154, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.1193696191906929, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.08641779962927103, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.0688980790041387, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06063314178958535, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.05584918238222599, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.0509657037910074, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.04747228866443038, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04346971089951694, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04027993706054986, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.037561352783814075, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03440752581693232, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03107054116204381, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.02762192312628031, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.024647807888686656, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02127108539454639, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.018140034824609755, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.014846329800784588, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.011590787321329116, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.008505022609606385, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.005556372525170446, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0037295046541839837, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.002595988353714347, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0015470431931316852, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.000854246336966753, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005370004009455443, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00038953878916800025, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00024433073587715626, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001737651787698269, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00010396985337138176, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.6645263209939004e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.36767902970314e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.3012001290917397e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 9.932573884725571e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0003025267738848925, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.002541714496910572, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00037150275893509387, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0009268718399107457, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0018238105066120626, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0037264473829418422, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.005840821834281087, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.02028746104799211, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.007151867523789406, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.02334058933891356, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01609885283280164, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014978881874121726, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.013254248537123203, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.011553285794798284, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.010145315774716437, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.008787341420538724, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0075490766228176655, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006429632778745145, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005897997235879302, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0057432155072456225, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005667463960126042, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005587894844356924, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005518880952149629, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0054267753008753064, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005319899872411042, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005215533822192811, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005068380955781322, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004894689353241119, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0046856135039706715, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004478821789525682, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004204686779848999, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003912552521069301, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0035471982743183615, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0031033086198294767, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0025718399145262085, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0019316234019061086, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.001403335639515717, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0010053152329783188, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0006165084884560201, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0003504245870954037, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00021822116073053622, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0001551370955485254, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.957245556392991e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 7.591353358293417e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.2094983525137193e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.09047394127937e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.3851058365360612e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.231507311316507e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.2282087806955456e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0004819548146474828, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0004005091745880363, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00021237168942131725, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0007086090543201597, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.001747941662249955, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0018094243239048103, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0020449528102543142, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.005129822952923312, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.006019796948478119, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0053071648163954665, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.7589104771614075, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.6214064359664917, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.4378378391265869, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.30913230776786804, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.22718673944473267, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.15843263268470764, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.10883297771215439, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.07734967768192291, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.06062506511807442, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.052576832473278046, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.048036299645900726, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.0436224490404129, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.040748629719018936, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03785218670964241, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.03588810935616493, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03430582210421562, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03289136663079262, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03158659115433693, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.030573885887861252, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.029740329831838608, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.029187100008130074, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.029041390866041183, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.029173290356993675, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.029737945646047592, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03076821193099022, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03203773498535156, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.033134572207927704, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.034017130732536316, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.034912820905447006, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03604060783982277, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.036593958735466, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03775588795542717, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03794736787676811, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.039354536682367325, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.039568692445755005, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04783833399415016, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06361036747694016, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07599825412034988, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1290375292301178, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.14733438193798065, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.19070185720920563, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.21712690591812134, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.4298803508281708, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5078914761543274, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5206060409545898, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7022820115089417, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.9710080623626709, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.1809521913528442, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.9128496646881104, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9064980158730159, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9176587301587301, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9342757936507936, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9526289682539683, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9655257936507936, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9756944444444444, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9801587301587301, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9828869047619048, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9841269841269841, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9858630952380952, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9915674603174603, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.988343253968254, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.986359126984127, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9866071428571429, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8969872270180681, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9111991593786494, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9315690648962265, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9513743056279689, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9644059645419799, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9749011238102986, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9789463399702365, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9813743669370198, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9828412629425732, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9841549773655685, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.985058126850975, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9850103543290638, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.985555066077773, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9868036698752334, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9869840124370636, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9856441363024524, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9864818265072083, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9870697017244097, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.987515188510836, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9881434343304546, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9881986027111597, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9875659059679494, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9878706986097537, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.98769038129206, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.98863949244353, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9888200201160886, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9884571880074937, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9883358692717465, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9884544317296053, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9885436864070898, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.988714732159884, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9884908867033285, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9893514340717845, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9895396591656264, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9901392292442148, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9907763854442877, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9876868421433959, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9880498323647776, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9881079263048841, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.98649911505327, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9900244282612233, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9885493600204298, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.985630446581085, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9860685340344689, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9864447652015619, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9842630678120126, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9880257224261245, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9825467946765849, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9846475126743981, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 4.6645263209939004e-05, "validation/loss_best": 0.04783833399415016, "validation/acc_best": 0.9920634920634921, "validation/f1_best": 0.9907763854442877} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.0680533193424344, "train/grad": 0.04180876610800624, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.7477811527252197, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.6112289953231812, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.43119316816329956, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.30726631104946134, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.22912053525447845, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.16351891458034515, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.1159313128143549, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.0865588791668415, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.07107143735513091, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06331359489820898, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.058553990041837096, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.05360504643060267, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.04999523133970797, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.045767688937485215, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04233477896079421, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.039426379669457674, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.0359212298784405, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03222950252704322, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.028387375446036457, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.025091507667675615, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.021418885374441744, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.017961751548573376, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.014382777875289321, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.010895703639835119, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.007724046697840095, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.004858512869104743, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.003130618529394269, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.002097082966938615, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.001250525275245309, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0007190538849681616, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00047562045976519586, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0003456194885075092, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00022739458829164504, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001638864353299141, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00010269012302160262, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.9559371545910836e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.0965691655874254e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2905988842248917e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.919519320130348e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.27460840344429e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.534128725528717e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0001451476849615574, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00015657907351851463, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.475159078836441e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.002269778372719884, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.00023804115131497382, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.409745499491691e-05, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.3715047389268875e-05, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.00756106055341661, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.016142621547915043, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015002479548566044, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.013217897517606617, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.011507236626930534, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.010143882997799664, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.008813810648862273, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.007490960644790903, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006447715227259323, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.006018787264474667, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0058778106607496735, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005800793918315322, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005713932485377882, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0056246937034302395, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005491763012541923, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005358240159403067, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005230501506885048, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0050507326037040915, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0048567045235540715, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004628997062391136, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004409460548195057, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0041334271218511276, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003831269829533994, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003467441082611913, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0029985930231487146, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002425578800684889, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0017564166561351159, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0012531017771107145, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.000890636086332961, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0005489334790945577, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00031012519484647784, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00019751644384314205, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00014174382231999517, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.484428994085193e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 7.221324530917172e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.9083989223248635e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.9482799571241004e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.1584369213135114e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.2198522371529505e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 5.413445719189378e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 4.116254762746262e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 9.053130376805407e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00017699592570407925, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0001204547010950244, "train/grad_043_lr2.2e+01_wd1.0e+00": 6.44352338749019e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0007907263091085245, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0004942737305735734, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0008487680027270821, "train/grad_047_lr4.3e+01_wd1.0e+00": 4.1287920986793475e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.002647985905942457, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.7285225987434387, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.5921844244003296, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.41236111521720886, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.2895567715167999, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.21280835568904877, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.1482289433479309, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.10104537755250931, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.07310556620359421, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.0585622675716877, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.05128296837210655, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.047072332352399826, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.042885612696409225, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04016907513141632, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.037359144538640976, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.03540830686688423, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03404305875301361, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.0327117033302784, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.031699880957603455, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.030960218980908394, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.030517490580677986, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.030396033078432083, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03069019690155983, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0313364714384079, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.032422881573438644, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03366447240114212, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.034645162522792816, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.035361554473638535, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03574957698583603, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.035986606031656265, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.036573827266693115, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.037053659558296204, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03784852847456932, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.038084644824266434, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03945988789200783, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03930005803704262, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04726891964673996, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06295952200889587, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07444633543491364, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12638060748577118, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.14549264311790466, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.18415887653827667, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.21164624392986298, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.41297978162765503, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4854663610458374, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5195916891098022, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6370760202407837, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.8628629446029663, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.056992769241333, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.7535841464996338, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9097222222222222, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9228670634920635, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9392361111111112, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9565972222222222, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9670138888888888, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9769345238095238, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9799107142857143, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9836309523809523, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.984375, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9868551587301587, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9918154761904762, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.988343253968254, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9871031746031746, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9014361407724707, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9171908391823028, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9373899696777773, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9554221323303079, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9657653468405527, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9760642599882849, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9785984671173986, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9823873724855051, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9830200966853233, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9854127410690396, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9854632158719651, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9853323134060968, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9855112920724232, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9855435114000406, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9854548324578639, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9855770948983247, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9858072208522307, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9873445900025439, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9870337428870877, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9871359196335598, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.987311803108325, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9878779232555062, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9869731878534453, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9877394268332471, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9882736869146749, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9879135518317939, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.98813598575609, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9880440106648463, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9886879941634442, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9879603710398606, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9888519764375955, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9890320029239315, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9893116586092109, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9893156901718912, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9906713881249664, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9909580918726532, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9875077735707156, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9882271761411946, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9881079263048841, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9869569902782949, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9905658351735495, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9885502162324322, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9852349934267289, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9862905742751235, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9875339960609301, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9846228119350877, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9853714190672447, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9843922924477287, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9853254560064902, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 4.9559371545910836e-05, "validation/loss_best": 0.04726891964673996, "validation/acc_best": 0.9923115079365079, "validation/f1_best": 0.9909580918726532} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.06291382821276784, "train/grad": 0.039026758866384625, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.7167497491836547, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.5803973340988159, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.40243027091026307, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.28317036271095275, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.20945148229598998, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.147466851323843, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.10255959719419479, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.07705155314877629, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.06365390712395311, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.056825785487890246, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.05253427565097809, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.04803392929956317, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.0447196059115231, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.040815137038007376, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.03761774754151702, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03491178728640079, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03177342803217471, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.02844257769174874, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.025042950240895152, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.022042032955214383, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.018673825236037375, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.015459878295660019, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.012161731300875545, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.00903486511670053, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.006400859281420708, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0040564834047108886, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0026169523969292643, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.00176516299135983, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0010619627684354783, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0006226308364421129, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00041427179239690305, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00030472506769001483, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00020326463505625724, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00014393627643585205, "train/loss_034_lr5.1e+00_wd1.0e+00": 9.510910138487815e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.162067547440529e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.139219082891941e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2807166203856468e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 4.014410078525543e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 9.039333090186119e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.03477218747139e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 6.299139931797981e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.0940898209810257e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 6.044283509254456e-09, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.6263296604156493e-08, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.6763806343078613e-10, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.003604319952428341, "train/loss_047_lr4.3e+01_wd1.0e+00": 7.450580596923828e-11, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0003689576964825392, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01576890015974641, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014641486536711455, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01282209855504334, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.011090205886866898, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.009729784259106963, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.008387824945384637, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.007097827000543475, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.00626340044720564, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005940450820489786, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005825678341789171, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005745991240255535, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005659664325066842, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005573269996384625, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0054389538589748555, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005309713455208111, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005180101038713474, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0050175565312383695, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004828671582072275, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004614961215702351, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004399873594520614, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004130910430394579, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0038314239748433467, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003447935953445267, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002954168076139467, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002392303438427916, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0017269448050683423, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0012030382070770428, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0008356020919018192, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0005032103328630911, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002889742349998414, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0001868721718483357, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0001338247231274181, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.089721335726608e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.855944438257211e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.601734853110884e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.8158674750251577e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.1305313245898462e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.4189399053533957e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 6.316706653262069e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.5101590398189354e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 4.8353247352255045e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.1571599985505062e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 2.200537529872377e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.620679275459126e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 1.7373463541649252e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 8.969879702237512e-08, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0005277975471908364, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.6423321016517883e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0009638181096708335, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.7078474164009094, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.5724486708641052, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.39551615715026855, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.276757150888443, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.2035137265920639, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.14116530120372772, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.09573913365602493, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.07037220895290375, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.05711395666003227, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.050375375896692276, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.046267058700323105, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.04236694052815437, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.039746325463056564, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03695789724588394, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.035146862268447876, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03377079218626022, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03233044221997261, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.031113095581531525, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.030052559450268745, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.02928546816110611, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.028959909453988075, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.028802920132875443, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.02916177362203598, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.029996229335665703, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03097619116306305, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03242666274309158, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03359010070562363, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03447580337524414, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03548162057995796, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03658907860517502, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.037220653146505356, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.0381166897714138, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03831956163048744, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.039531830698251724, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03942041099071503, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.046920422464609146, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06224836781620979, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07341521233320236, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12397737056016922, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.14251866936683655, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.18033282458782196, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2052312046289444, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.39938846230506897, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.46622148156166077, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.49528950452804565, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6075227856636047, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.8413094878196716, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.997890055179596, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.8160403370857239, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9107142857142857, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9221230158730159, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9399801587301587, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.957093253968254, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9682539682539683, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9771825396825397, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9813988095238095, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9836309523809523, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.984375, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9868551587301587, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9918154761904762, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9861111111111112, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.902891434344684, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.917356528883427, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9381226795261893, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9553238235636874, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9672868511275226, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9762489915971285, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9800349299977006, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9821942031947417, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9830034705192803, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.985416491832184, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9856426021868477, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9864456828367067, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9860732040208313, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9860758729818835, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9859871940397068, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9864713626651477, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9866544581203814, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9875050285062927, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9884173972874425, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9885243584103921, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9878424903810286, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9888999097256755, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9890846755168933, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9896666555864793, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9897090806133856, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9893497054832902, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9891700468976949, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9893943053245116, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9896308980195665, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9892670056158567, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9892120165430253, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9892120165430253, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9898466353868083, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9895396591656264, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9904787354707976, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9910555649604573, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.987463675683437, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9882271761411946, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9881079263048841, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9869569902782949, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9905658351735495, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9885502162324322, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9854154261933227, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9864748116098558, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9872561595233018, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9851544159209719, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9865497656480733, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9843922924477287, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9845308536475509, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 4.162067547440529e-05, "validation/loss_best": 0.046920422464609146, "validation/acc_best": 0.9923115079365079, "validation/f1_best": 0.9910555649604573} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.06289739357307553, "train/grad": 0.03835463617928326, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.705423173904419, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.570262930393219, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.3948627984523773, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.2788886284828186, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.20792204409837722, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.14771516472101212, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.10407483257353306, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.08008682547137141, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.0674785986635834, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06073391932994127, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.05640691203996539, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.05162611305713653, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.04799932459369302, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04376521847210824, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.040268539004027847, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.037218198962509635, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03375292516313493, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.0300068673864007, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.026105123003944755, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.02279954366385937, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.019031782476231454, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.015468019219115376, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.011905063027516007, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.008583818525075913, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.005840154020115733, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0035265342611819506, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.002241991739720106, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0015160415973514319, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0009279452357441187, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.000567288463935256, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00038782427087426187, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00029152953997254374, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00020236526615917682, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00014931711368262768, "train/loss_034_lr5.1e+00_wd1.0e+00": 9.60431806743145e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.489956423640251e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9749710336327553e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.1315932497382163e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.315350040793419e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.2858174741268159e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.8440932035446166e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 6.386972963809967e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.7150212079286575e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 8.173007518053055e-07, "train/loss_044_lr2.6e+01_wd1.0e+00": 9.918306022882462e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.9459985196590423e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.405201576650143e-05, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.1175870895385742e-10, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0036718752328306434, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015764860161580144, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014622635492123663, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012787778247147798, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.011072894344106316, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.00978418529033661, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.008510668713133782, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.007218734000343829, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0063979342859238385, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.00606971492874436, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0059303274104604495, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0058348450169432905, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.00570844468777068, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005579033424146474, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00540026584174484, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005220536147826351, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005045419552188833, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004827776216843631, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004572064764215611, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004284885859960923, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004018247316853376, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0036910353727580515, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0033514454319083595, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0029485970511450433, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0024585685455531347, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0019105272054002853, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0013038501425035065, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0008913073521762271, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0006236817378703563, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00038774417553213427, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00023526587816377286, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00015796431036051216, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0001169787203116357, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.906174349955109e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.079600308851241e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.298195027104157e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.637203679050515e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.830156330811572e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.2034645928671871e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 5.479599440854323e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.8005133378846312e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 5.737524567333541e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.1203664596061936e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 3.879690254640386e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 2.089458581762662e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.899169397711301e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 7.575651344426396e-07, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0003350927194040203, "train/grad_047_lr4.3e+01_wd1.0e+00": 5.209870338037728e-07, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.00026096208072963587, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.6951501965522766, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.5604115724563599, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.3851926326751709, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.26909810304641724, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.19775861501693726, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.1368960738182068, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.09267527610063553, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.06883109360933304, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.05627833679318428, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.04977519437670708, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04589981958270073, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.04208742827177048, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.03953256830573082, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03682640194892883, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.0350184328854084, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.033562976866960526, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03239383175969124, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.031214844435453415, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.030343638733029366, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.0296788327395916, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.029274489730596542, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.0292750783264637, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.029713468626141548, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.030514193698763847, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03158439323306084, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03290867432951927, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03412118926644325, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03503777086734772, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03588135167956352, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03686937317252159, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03736694157123566, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.0382913276553154, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.038489773869514465, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.039679598063230515, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03952920064330101, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.046628743410110474, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.061873216181993484, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07277388870716095, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12244544178247452, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1404738575220108, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.17764094471931458, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.20178471505641937, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.39085185527801514, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4545629024505615, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.4755858778953552, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5872846245765686, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.7615380883216858, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9590377807617188, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.7378755807876587, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9119543650793651, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9241071428571429, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.941468253968254, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9580853174603174, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9692460317460317, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9776785714285714, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9813988095238095, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9838789682539683, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9846230158730159, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9868551587301587, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9918154761904762, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.986359126984127, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9046531875862838, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9195317211498713, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9397743364346093, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9567207569158785, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9683672319886153, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9766081164888964, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9800349299977006, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9824678231717514, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9831816659097729, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9854146688993305, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9858682795481122, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9862229301650304, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9862536264254798, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9855435114000406, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9857238731391901, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9859388862549545, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9861192070936807, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9865757043854422, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9877077858225688, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9878099625690409, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.987719660086975, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9884215475604252, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9881389788677537, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9885465547729614, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9891723212061639, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9895323947659931, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9890945706402023, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9891400283901304, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9890348881593376, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9890335517945718, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9890319411606392, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9892120165430253, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9900266535568856, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9895396591656264, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9902621722305576, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9908751840299418, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9872846071107565, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9882271761411946, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9881079263048841, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9869569902782949, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9905658351735495, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9885502162324322, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.985684374968223, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9864748116098558, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9882070189565711, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9856985625242318, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.987174873227669, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9844949467926492, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9852674176083014, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 4.489956423640251e-05, "validation/loss_best": 0.046628743410110474, "validation/acc_best": 0.9920634920634921, "validation/f1_best": 0.9908751840299418} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.06190460694953799, "train/grad": 0.037042711311951276, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.6963702726364136, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.5619714832305909, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.3882812988758087, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.2741180104017258, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.20409009039402007, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.14445557318627833, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.10122905392199755, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.07806187435984611, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.065803451417014, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.059139808220788834, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.05497413894161582, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.05047056454233825, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.04710125514306128, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.043220785474404694, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04001070429570973, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03719543975777924, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03393612057901919, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03037891136482358, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.02671935115940869, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.02351556047797203, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.019750992292538284, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.0161390598397702, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.012334570651873947, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.008734031887724996, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.005756711699068546, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0033787346445024015, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0021299275942146777, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.001443233722820878, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0008835126087069511, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0005427893903106451, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.000381677495315671, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.000284899165853858, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0001932375505566597, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00013765242882072927, "train/loss_034_lr5.1e+00_wd1.0e+00": 9.058994241058827e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.8751326501369475e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.116149291396141e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.3587810099124908e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.417599946260452e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.2258095666766167e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.0857155323028564e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 6.984481588006019e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.6456004232168198e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.6085803508758545e-07, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.8626451492309571e-10, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.9812025129795074e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.9701197743415832e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.210719347000122e-10, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.332993507385254e-10, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015216516861692071, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014112949459813536, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01235941962338984, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.010706485405098647, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.009412985511589795, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.008123373419512064, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.006874851727625355, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006158338042441755, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.00588217367825564, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0057622162566985935, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005686642063083127, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005570294575882144, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005457845746714156, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005297743993287441, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005145943328097928, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004993490051128901, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004809010100725573, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004595687566470588, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004339112463203492, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004089974374364829, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003768461966101313, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003419815202942118, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0029791114873660264, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0024338707680726657, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0018373998953757108, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0012251963097151019, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0008273167064908193, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005784123964986066, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00036272766474212406, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00022552475932570814, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0001535717322894925, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00011442886705026467, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.888324154009752e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.038044863146297e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.250351362600213e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.5368457689509683e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.7424052106073672e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.3574169638332079e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 5.091811046330625e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.4780137916145937e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 4.748415643637638e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.6042497569333154e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 3.6805539720051418e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 9.072873276776469e-07, "train/grad_044_lr2.6e+01_wd1.0e+00": 5.152315437767117e-10, "train/grad_045_lr3.1e+01_wd1.0e+00": 5.610259440485531e-07, "train/grad_046_lr3.6e+01_wd1.0e+00": 6.370846782616754e-07, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.390650930213612e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.00023074020844165776, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.6883645057678223, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.554053783416748, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.37980300188064575, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.2650471329689026, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.19483555853366852, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.1347726434469223, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.09127860516309738, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.06814207881689072, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.055905669927597046, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.04953170195221901, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04571359604597092, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.04192192479968071, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.039366427809000015, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.036685604602098465, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.03476806357502937, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03341064229607582, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.032085251063108444, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.030925147235393524, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.02988579496741295, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.029350686818361282, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.028971266001462936, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.029018642380833626, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.02951759845018387, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.030325185507535934, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.031351279467344284, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.032828181982040405, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.034083858132362366, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03509543091058731, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03592108190059662, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03698006272315979, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.037390511482954025, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.038236651569604874, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03845669701695442, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03964177891612053, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.039469052106142044, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.0466458834707737, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.061733804643154144, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07244469970464706, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12195220589637756, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13941353559494019, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.17615684866905212, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.19919584691524506, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3861507773399353, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.447377473115921, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.4669404625892639, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5782435536384583, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.7525668740272522, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9417871236801147, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.7222577929496765, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9144345238095238, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9253472222222222, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9429563492063492, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9583333333333334, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9697420634920635, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9776785714285714, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9811507936507936, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9838789682539683, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9846230158730159, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9871031746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.988343253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.988343253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9918154761904762, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.986359126984127, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9067459654534209, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9207985066272654, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9412631218855525, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9567114273031466, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9688129777900084, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9766081164888964, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9799435554237778, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9824678231717514, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9831816659097729, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9855932561819039, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9860469103259742, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9860480317624015, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9860732040208313, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9862548785311533, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9864338648741354, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9868280444128068, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9868308190293144, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9875134285469296, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9873389346342253, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9875793327140402, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9881174671981638, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9885849834988266, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9885849834988266, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9889933364783661, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9893528026919269, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9895323947659931, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9890945706402023, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9889127582570985, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9890348881593376, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9890872673690186, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9890319411606392, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9892120165430253, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9900266535568856, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9895396591656264, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9902621722305576, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.990776223801854, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9872846071107565, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9882271761411946, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9883777269389954, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9869569902782949, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9905658351735495, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9885502162324322, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.985684374968223, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9864305680633405, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9882070189565711, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9856985625242318, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9869958217201273, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9844949467926492, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9851132946563759, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 3.8751326501369475e-05, "validation/loss_best": 0.0466458834707737, "validation/acc_best": 0.9920634920634921, "validation/f1_best": 0.990776223801854} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.059370497167110445, "train/grad": 0.03686906044371426, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.6830331611633301, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.549985589981079, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.37810485243797304, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.26534914553165434, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.1960969913005829, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.13733432054519654, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.09529278255999088, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.07305087216198444, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.06127139450982213, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.05487804535776377, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.05083409052342176, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.04644197454676032, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.04323996492661536, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.039481410859152674, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.036381643554195764, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.033730082316324114, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.030658458843827247, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.02727996315807104, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.023780692424625157, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.020761857712641357, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.017275530081242323, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.013932971553876995, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.010520726684480905, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.00733324958011508, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0048262056708335875, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.002855055658146739, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0018248532246798277, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0012586327269673348, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0007926800660789013, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0004943474661558867, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00034072279930114747, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0002519104816019535, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0001718444749712944, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00012809794396162033, "train/loss_034_lr5.1e+00_wd1.0e+00": 7.798202335834503e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.731976263225079e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.4036912471055985e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0106321424245834e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.5728886723518373e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.0952800512313842e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.5478674322366714e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.979587927460671e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 7.393863052129745e-07, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.7595087885856627e-08, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.3283064365386963e-10, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.200264811515808e-09, "train/loss_046_lr3.6e+01_wd1.0e+00": 9.611248970031739e-09, "train/loss_047_lr4.3e+01_wd1.0e+00": 9.313225746154786e-11, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.8067657947540283e-09, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0154367996705696, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014309231601655483, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012503822017461062, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.010834066320676357, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.009555957987904548, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.008231739574111998, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.00683625646866858, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006010931055061519, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005704659082693979, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0055769632285227995, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005492578382254578, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005376383058028296, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005270982170768548, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005114180283562746, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004965117669198662, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004820889437978621, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004640953161288053, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004427589184633689, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004172971126390621, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.003937061771575827, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003630519400248886, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0032893154129124014, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0028549395602749427, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0023270712871453726, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0017652265106880805, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0011791173438905388, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0007955463460166356, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005622667859643116, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0003525719222943735, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00021921508319564965, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00015074556089530233, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00011237709755732794, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.786393346464138e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.093023421612997e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.1148225743938834e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.6430368667433867e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.841398359336166e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.1448412419555395e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 6.1247668774306275e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.6382110360339218e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 5.9537487936726015e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.1809427306861595e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 3.203840370904246e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 9.772835152469819e-07, "train/grad_044_lr2.6e+01_wd1.0e+00": 9.045451050479135e-10, "train/grad_045_lr3.1e+01_wd1.0e+00": 5.595721876791127e-07, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.2421946290368487e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 5.06833151615169e-07, "train/grad_048_lr5.0e+01_wd1.0e+00": 6.52199408375269e-09, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.6859580874443054, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.5516873002052307, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.3778447210788727, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.2636871933937073, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.19374723732471466, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.133832648396492, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.09062067419290543, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.06780954450368881, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.05574790760874748, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.0494367890059948, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04561138153076172, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.04176700860261917, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.03924822062253952, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03655153512954712, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.03473801165819168, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03332207724452019, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03203357756137848, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03080929070711136, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.02990649826824665, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.029355093836784363, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0289682075381279, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.029087543487548828, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.029582040384411812, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03040354698896408, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03147144615650177, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.032930873334407806, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03415828198194504, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.035034604370594025, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.035885732620954514, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03685382008552551, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03729386255145073, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03830643370747566, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.038490571081638336, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.0396733395755291, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03946039453148842, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04664141684770584, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06153477728366852, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07216241955757141, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12163027375936508, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13875456154346466, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.17587055265903473, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.19870813190937042, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.38487309217453003, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4440779983997345, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.4658566117286682, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5745438933372498, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.7437102198600769, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9345310926437378, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.7143110036849976, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9136904761904762, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9243551587301587, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9444444444444444, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9595734126984127, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9697420634920635, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9776785714285714, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9811507936507936, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9838789682539683, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9848710317460317, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9871031746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.988343253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.988343253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.988343253968254, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9918154761904762, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9866071428571429, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.905973954783301, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9200506251026441, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9427298912790308, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9583388519467881, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9692682203041185, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9766953916931277, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9797665028606518, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9824678231717514, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9835555977143555, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9855932561819039, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9860469103259742, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9864015609428927, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9860732040208313, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9862548785311533, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9864338648741354, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9868280444128068, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.987011223004391, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9875134285469296, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9873389346342253, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.987399025459882, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9881174671981638, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9880529608757737, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9887697492900445, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9891724365184089, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9891716765656235, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9888119802494963, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.988765010152389, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9889127582570985, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9888545618340142, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9890872673690186, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9890319411606392, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9892120165430253, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9900266535568856, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9895396591656264, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9902621722305576, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9910555649604573, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9872846071107565, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9882271761411946, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9881997250397744, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9875043356532265, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9905658351735495, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9885502162324322, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.985684374968223, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9864305680633405, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9876593861151525, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.985878308359716, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9869958217201273, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9844949467926492, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9853811135212395, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 3.731976263225079e-05, "validation/loss_best": 0.04664141684770584, "validation/acc_best": 0.9923115079365079, "validation/f1_best": 0.9910555649604573} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.0597082632035017, "train/grad": 0.03669589905068278, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.6843694496154785, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.5506979703903199, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.3785544753074646, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.26604374408721926, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.19732783257961273, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.13865597993135453, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.09662266425788403, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.07447650458663702, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.06261720137670636, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.05618859418667853, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.052108708526939156, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.04763770326040685, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.04427020804025233, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.040331019461154936, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.03705335071310401, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03423012850806117, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.030897568883374332, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.027382404301315545, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.02370964246802032, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.02062848934903741, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.017123874677345156, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.013767625046893954, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.010387667324393987, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.00732486117631197, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.00486593103967607, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.00291457237675786, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0018693353328853846, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0012875199969857932, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.000808147406205535, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0004983415827155113, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003421318531036377, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00025133155286312104, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00017369150184094905, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001245530880987644, "train/loss_034_lr5.1e+00_wd1.0e+00": 7.848430424928666e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.5799313336610795e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.577412709593773e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 9.65026207268238e-06, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.6704819649457932e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 8.83287750184536e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.015652135014534e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 6.00677914917469e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 6.444938480854034e-07, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.378784120082855e-07, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.7008354663848875e-10, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.712492227554321e-09, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.124641418457031e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.9318424165248871e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.1420419216156007e-10, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01514101106673479, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014025046369060873, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012244434067979456, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.010583757616113871, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.009318096390925347, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.008050382493529469, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0068029893224593255, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006101839290931821, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0058455987117486076, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005741984828491695, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005671920251334086, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005569704962254036, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0054615519408253025, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005310000329045579, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005152836622728501, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0050077082926873115, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004810054885456339, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004577834068768425, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0043044839675712866, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004047558148158714, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0037169330194592475, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003356801696209004, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0029070818446052726, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002348860882120789, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0017565223338169744, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.001159010203409707, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0007761442494120274, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005434052074724605, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00034361546497166276, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00021318776927728322, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00014694616497308742, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00010891983311239528, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.604053178738467e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.963667991863986e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.127966908754388e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.660630878544623e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.7992107566993652e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.2405905899099054e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 5.26321664746931e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.6729026901598064e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 5.769113128366396e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.4694993172862572e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 2.6501249485129366e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.2017366861711832e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 1.5921590251379767e-09, "train/grad_045_lr3.1e+01_wd1.0e+00": 5.592118805757829e-07, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.2815153285144457e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 5.064324792728874e-07, "train/grad_048_lr5.0e+01_wd1.0e+00": 2.7903038125833276e-09, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.6855393052101135, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.5512863993644714, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.37754353880882263, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.26351290941238403, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.19361913204193115, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.13367338478565216, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.09048916399478912, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.06778322905302048, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.055714283138513565, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.04940582066774368, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.045558344572782516, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.041752953082323074, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.03926004841923714, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03654393181204796, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.03473157808184624, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03329988569021225, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03200680762529373, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03081529028713703, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.029889393597841263, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.029270725324749947, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.028996847569942474, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.028978295624256134, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.029593564569950104, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.030301090329885483, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.031458403915166855, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0329003669321537, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03413589298725128, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.035043977200984955, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03593001887202263, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03687655180692673, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03733730688691139, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.038295235484838486, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03842848166823387, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03969542309641838, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03944867104291916, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04662461578845978, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06153904274106026, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07213544845581055, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12165704369544983, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13878513872623444, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.17573115229606628, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.1983037143945694, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3847491145133972, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4437790811061859, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.46506065130233765, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5754733681678772, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.7434788346290588, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9330863356590271, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.7141889333724976, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9136904761904762, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9241071428571429, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9444444444444444, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9593253968253969, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9694940476190477, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9776785714285714, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9811507936507936, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9838789682539683, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9848710317460317, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9871031746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.988343253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.988343253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9918154761904762, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.986359126984127, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9061149964773921, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9198165121641094, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9427298912790308, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9580675756103048, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9689996982738461, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9766953916931277, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9797665028606518, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9824678231717514, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9835555977143555, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9855932561819039, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9860469103259742, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9864015609428927, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9860732040208313, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9862548785311533, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9864338648741354, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9868280444128068, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.987011223004391, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9875134285469296, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9875617175030408, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9875793327140402, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9877083357410683, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9880529608757737, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9887697492900445, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9889927329922347, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9891716765656235, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9888119802494963, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.988765010152389, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9889127582570985, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9888545618340142, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9890872673690186, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9890319411606392, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9892120165430253, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9900266535568856, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9895396591656264, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9902621722305576, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9910555649604573, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.987463675683437, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9882271761411946, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9881997250397744, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9869569902782949, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9905658351735495, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9885502162324322, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.985684374968223, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9864305680633405, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9876593861151525, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9856985625242318, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9869958217201273, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9844949467926492, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9851132946563759, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 3.5799313336610795e-05, "validation/loss_best": 0.04662461578845978, "validation/acc_best": 0.9923115079365079, "validation/f1_best": 0.9910555649604573} diff --git a/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/config.yaml b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a4e49bfcd59849ed19f43d6f7452a24b554357a --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn +remote_dir: null diff --git a/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..929124468ff3a3ae89f1837125c84db4d694d33c --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 15, "eval/id_best": 18, "eval/lr_best": 0.00011399999999999999, "eval/wd_best": 0.05, "eval/train/loss": 2.0526070594787598, "eval/train/acc": 0.38455391991149085, "eval/train/acc_std": 0.0025284902360519874, "eval/train/f1": 0.3276738171611527, "eval/train/f1_std": 0.0026909873548640485, "eval/validation/loss": 2.3856709003448486, "eval/validation/acc": 0.28903654485049834, "eval/validation/acc_std": 0.005672179069362943, "eval/validation/f1": 0.22604240233887776, "eval/validation/f1_std": 0.0052921779632021685, "eval/test/loss": 2.241790533065796, "eval/test/acc": 0.3163265306122449, "eval/test/acc_std": 0.0054211095278469505, "eval/test/f1": 0.24594998726363496, "eval/test/f1_std": 0.005426376457639777, "eval/testid/loss": 2.2627062797546387, "eval/testid/acc": 0.3084634663582032, "eval/testid/acc_std": 0.005788127800688075, "eval/testid/f1": 0.2510760808284826, "eval/testid/f1_std": 0.005474750331600504} diff --git a/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..adf9ddf45325dce9e74156a54909ed65df8d8874 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 15, "eval/best/id_best": 18, "eval/best/lr_best": 0.00011399999999999999, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.0526070594787598, "eval/best/train/acc": 0.38455391991149085, "eval/best/train/acc_std": 0.0025284902360519874, "eval/best/train/f1": 0.3276738171611527, "eval/best/train/f1_std": 0.0026909873548640485, "eval/best/validation/loss": 2.3856709003448486, "eval/best/validation/acc": 0.28903654485049834, "eval/best/validation/acc_std": 0.005672179069362943, "eval/best/validation/f1": 0.22604240233887776, "eval/best/validation/f1_std": 0.0052921779632021685, "eval/best/test/loss": 2.241790533065796, "eval/best/test/acc": 0.3163265306122449, "eval/best/test/acc_std": 0.0054211095278469505, "eval/best/test/f1": 0.24594998726363496, "eval/best/test/f1_std": 0.005426376457639777, "eval/best/testid/loss": 2.2627062797546387, "eval/best/testid/acc": 0.3084634663582032, "eval/best/testid/acc_std": 0.005788127800688075, "eval/best/testid/f1": 0.2510760808284826, "eval/best/testid/f1_std": 0.005474750331600504} diff --git a/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fea5abb659b86b86630f74222bd98fbac42b92 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 18, "eval/last/lr_best": 0.00011399999999999999, "eval/last/wd_best": 0.05, "eval/last/train/loss": 2.0406382083892822, "eval/last/train/acc": 0.3881803374412244, "eval/last/train/acc_std": 0.0025052538677778633, "eval/last/train/f1": 0.3319802645686469, "eval/last/train/f1_std": 0.0026600826457256186, "eval/last/validation/loss": 2.3837547302246094, "eval/last/validation/acc": 0.2870062753783684, "eval/last/validation/acc_std": 0.005549755572089327, "eval/last/validation/f1": 0.2256492492538736, "eval/last/validation/f1_std": 0.005221952905307762, "eval/last/test/loss": 2.241185426712036, "eval/last/test/acc": 0.3181818181818182, "eval/last/test/acc_std": 0.005555482020357875, "eval/last/test/f1": 0.24939410190200365, "eval/last/test/f1_std": 0.005583698677888341, "eval/last/testid/loss": 2.255531072616577, "eval/last/testid/acc": 0.30962020435704646, "eval/last/testid/acc_std": 0.005789843702715705, "eval/last/testid/f1": 0.2520459500024084, "eval/last/testid/f1_std": 0.005430946780908184} diff --git a/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..f83766b6bf2709282e2cbdaef74863705ae1761c --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,15,0.00011399999999999999,0.05,18,"[0.38, 1.0]",train,2.0526070594787598,0.38455391991149085,0.0025284902360519874,0.3276738171611527,0.0026909873548640485 +flat_mae,patch,attn,nsd_cococlip,best,15,0.00011399999999999999,0.05,18,"[0.38, 1.0]",validation,2.3856709003448486,0.28903654485049834,0.005672179069362943,0.22604240233887776,0.0052921779632021685 +flat_mae,patch,attn,nsd_cococlip,best,15,0.00011399999999999999,0.05,18,"[0.38, 1.0]",test,2.241790533065796,0.3163265306122449,0.0054211095278469505,0.24594998726363496,0.005426376457639777 +flat_mae,patch,attn,nsd_cococlip,best,15,0.00011399999999999999,0.05,18,"[0.38, 1.0]",testid,2.2627062797546387,0.3084634663582032,0.005788127800688075,0.2510760808284826,0.005474750331600504 diff --git a/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..f83766b6bf2709282e2cbdaef74863705ae1761c --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,15,0.00011399999999999999,0.05,18,"[0.38, 1.0]",train,2.0526070594787598,0.38455391991149085,0.0025284902360519874,0.3276738171611527,0.0026909873548640485 +flat_mae,patch,attn,nsd_cococlip,best,15,0.00011399999999999999,0.05,18,"[0.38, 1.0]",validation,2.3856709003448486,0.28903654485049834,0.005672179069362943,0.22604240233887776,0.0052921779632021685 +flat_mae,patch,attn,nsd_cococlip,best,15,0.00011399999999999999,0.05,18,"[0.38, 1.0]",test,2.241790533065796,0.3163265306122449,0.0054211095278469505,0.24594998726363496,0.005426376457639777 +flat_mae,patch,attn,nsd_cococlip,best,15,0.00011399999999999999,0.05,18,"[0.38, 1.0]",testid,2.2627062797546387,0.3084634663582032,0.005788127800688075,0.2510760808284826,0.005474750331600504 diff --git a/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..fbee6be1a99dacdec88d073b1c8b0ef940bee8b2 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",train,2.0406382083892822,0.3881803374412244,0.0025052538677778633,0.3319802645686469,0.0026600826457256186 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",validation,2.3837547302246094,0.2870062753783684,0.005549755572089327,0.2256492492538736,0.005221952905307762 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",test,2.241185426712036,0.3181818181818182,0.005555482020357875,0.24939410190200365,0.005583698677888341 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",testid,2.255531072616577,0.30962020435704646,0.005789843702715705,0.2520459500024084,0.005430946780908184 diff --git a/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/log.txt b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b36a36297f78f5e48d29395bd1e5cdccfc256827 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/log.txt @@ -0,0 +1,963 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 19:52:25 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 58.8M (58.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:22:36 lr: nan time: 3.3918 data: 2.8077 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:50 lr: 0.000003 loss: 3.1699 (3.1749) grad: 0.1548 (0.1570) time: 0.4678 data: 0.0040 max mem: 22448 +train: [0] [ 40/400] eta: 0:03:10 lr: 0.000006 loss: 3.1608 (3.1629) grad: 0.1530 (0.1574) time: 0.4453 data: 0.0048 max mem: 22448 +train: [0] [ 60/400] eta: 0:02:50 lr: 0.000009 loss: 3.1522 (3.1651) grad: 0.1518 (0.1583) time: 0.4454 data: 0.0048 max mem: 22448 +train: [0] [ 80/400] eta: 0:02:35 lr: 0.000012 loss: 3.1560 (3.1642) grad: 0.1544 (0.1575) time: 0.4434 data: 0.0045 max mem: 22448 +train: [0] [100/400] eta: 0:02:23 lr: 0.000015 loss: 3.1602 (3.1639) grad: 0.1505 (0.1562) time: 0.4413 data: 0.0048 max mem: 22448 +train: [0] [120/400] eta: 0:02:12 lr: 0.000018 loss: 3.1541 (3.1612) grad: 0.1424 (0.1543) time: 0.4498 data: 0.0046 max mem: 22448 +train: [0] [140/400] eta: 0:02:02 lr: 0.000021 loss: 3.1404 (3.1589) grad: 0.1441 (0.1540) time: 0.4558 data: 0.0047 max mem: 22448 +train: [0] [160/400] eta: 0:01:52 lr: 0.000024 loss: 3.1404 (3.1562) grad: 0.1558 (0.1555) time: 0.4497 data: 0.0046 max mem: 22448 +train: [0] [180/400] eta: 0:01:42 lr: 0.000027 loss: 3.1338 (3.1541) grad: 0.1580 (0.1554) time: 0.4469 data: 0.0048 max mem: 22448 +train: [0] [200/400] eta: 0:01:33 lr: 0.000030 loss: 3.1425 (3.1535) grad: 0.1451 (0.1543) time: 0.4846 data: 0.0049 max mem: 22448 +train: [0] [220/400] eta: 0:01:24 lr: 0.000033 loss: 3.1520 (3.1531) grad: 0.1469 (0.1541) time: 0.4693 data: 0.0049 max mem: 22448 +train: [0] [240/400] eta: 0:01:14 lr: 0.000036 loss: 3.1446 (3.1514) grad: 0.1526 (0.1538) time: 0.4621 data: 0.0047 max mem: 22448 +train: [0] [260/400] eta: 0:01:05 lr: 0.000039 loss: 3.1198 (3.1488) grad: 0.1501 (0.1533) time: 0.4516 data: 0.0046 max mem: 22448 +train: [0] [280/400] eta: 0:00:55 lr: 0.000042 loss: 3.1074 (3.1455) grad: 0.1501 (0.1532) time: 0.4682 data: 0.0050 max mem: 22448 +train: [0] [300/400] eta: 0:00:46 lr: 0.000045 loss: 3.0910 (3.1407) grad: 0.1509 (0.1533) time: 0.4333 data: 0.0045 max mem: 22448 +train: [0] [320/400] eta: 0:00:37 lr: 0.000048 loss: 3.0723 (3.1374) grad: 0.1597 (0.1543) time: 0.4662 data: 0.0049 max mem: 22448 +train: [0] [340/400] eta: 0:00:27 lr: 0.000051 loss: 3.0786 (3.1338) grad: 0.1618 (0.1547) time: 0.4449 data: 0.0049 max mem: 22448 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 3.0584 (3.1294) grad: 0.1625 (0.1555) time: 0.4485 data: 0.0048 max mem: 22448 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 3.0551 (3.1254) grad: 0.1767 (0.1565) time: 0.4425 data: 0.0045 max mem: 22448 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.0590 (3.1225) grad: 0.1780 (0.1574) time: 0.4515 data: 0.0048 max mem: 22448 +train: [0] Total time: 0:03:04 (0.4614 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.0590 (3.1225) grad: 0.1780 (0.1574) +eval (validation): [0] [ 0/85] eta: 0:04:17 time: 3.0293 data: 2.7983 max mem: 22448 +eval (validation): [0] [20/85] eta: 0:00:30 time: 0.3442 data: 0.0042 max mem: 22448 +eval (validation): [0] [40/85] eta: 0:00:18 time: 0.3314 data: 0.0033 max mem: 22448 +eval (validation): [0] [60/85] eta: 0:00:09 time: 0.3319 data: 0.0037 max mem: 22448 +eval (validation): [0] [80/85] eta: 0:00:01 time: 0.3286 data: 0.0044 max mem: 22448 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3214 data: 0.0044 max mem: 22448 +eval (validation): [0] Total time: 0:00:31 (0.3679 s / it) +cv: [0] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 2.561 acc: 0.243 f1: 0.174 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:21:38 lr: nan time: 3.2459 data: 2.8953 max mem: 22448 +train: [1] [ 20/400] eta: 0:03:39 lr: 0.000063 loss: 3.0005 (3.0119) grad: 0.1706 (0.1676) time: 0.4451 data: 0.0029 max mem: 22448 +train: [1] [ 40/400] eta: 0:03:03 lr: 0.000066 loss: 3.0165 (3.0108) grad: 0.1628 (0.1647) time: 0.4403 data: 0.0048 max mem: 22448 +train: [1] [ 60/400] eta: 0:02:45 lr: 0.000069 loss: 2.9905 (2.9973) grad: 0.1609 (0.1661) time: 0.4409 data: 0.0048 max mem: 22448 +train: [1] [ 80/400] eta: 0:02:32 lr: 0.000072 loss: 2.9810 (2.9978) grad: 0.1761 (0.1696) time: 0.4440 data: 0.0049 max mem: 22448 +train: [1] [100/400] eta: 0:02:21 lr: 0.000075 loss: 2.9810 (2.9916) grad: 0.1761 (0.1710) time: 0.4474 data: 0.0048 max mem: 22448 +train: [1] [120/400] eta: 0:02:10 lr: 0.000078 loss: 2.9765 (2.9895) grad: 0.1732 (0.1720) time: 0.4397 data: 0.0048 max mem: 22448 +train: [1] [140/400] eta: 0:02:00 lr: 0.000081 loss: 2.9738 (2.9876) grad: 0.1755 (0.1730) time: 0.4418 data: 0.0047 max mem: 22448 +train: [1] [160/400] eta: 0:01:50 lr: 0.000084 loss: 2.9747 (2.9877) grad: 0.1755 (0.1734) time: 0.4486 data: 0.0048 max mem: 22448 +train: [1] [180/400] eta: 0:01:41 lr: 0.000087 loss: 2.9882 (2.9883) grad: 0.1779 (0.1745) time: 0.4623 data: 0.0048 max mem: 22448 +train: [1] [200/400] eta: 0:01:32 lr: 0.000090 loss: 2.9597 (2.9856) grad: 0.1800 (0.1753) time: 0.4714 data: 0.0048 max mem: 22448 +train: [1] [220/400] eta: 0:01:23 lr: 0.000093 loss: 2.9308 (2.9786) grad: 0.1941 (0.1774) time: 0.4598 data: 0.0048 max mem: 22448 +train: [1] [240/400] eta: 0:01:13 lr: 0.000096 loss: 2.9263 (2.9755) grad: 0.1941 (0.1776) time: 0.4686 data: 0.0049 max mem: 22448 +train: [1] [260/400] eta: 0:01:04 lr: 0.000099 loss: 2.9421 (2.9741) grad: 0.1796 (0.1782) time: 0.4477 data: 0.0046 max mem: 22448 +train: [1] [280/400] eta: 0:00:55 lr: 0.000102 loss: 2.9458 (2.9704) grad: 0.1794 (0.1784) time: 0.4615 data: 0.0047 max mem: 22448 +train: [1] [300/400] eta: 0:00:46 lr: 0.000105 loss: 2.9270 (2.9691) grad: 0.1818 (0.1791) time: 0.4702 data: 0.0049 max mem: 22448 +train: [1] [320/400] eta: 0:00:36 lr: 0.000108 loss: 2.9159 (2.9655) grad: 0.1909 (0.1800) time: 0.4535 data: 0.0049 max mem: 22448 +train: [1] [340/400] eta: 0:00:27 lr: 0.000111 loss: 2.8976 (2.9608) grad: 0.1925 (0.1807) time: 0.4473 data: 0.0049 max mem: 22448 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 2.8976 (2.9590) grad: 0.1909 (0.1813) time: 0.4509 data: 0.0049 max mem: 22448 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 2.8933 (2.9559) grad: 0.1909 (0.1822) time: 0.4502 data: 0.0050 max mem: 22448 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 2.8933 (2.9538) grad: 0.1983 (0.1836) time: 0.4511 data: 0.0049 max mem: 22448 +train: [1] Total time: 0:03:03 (0.4597 s / it) +train: [1] Summary: lr: 0.000120 loss: 2.8933 (2.9538) grad: 0.1983 (0.1836) +eval (validation): [1] [ 0/85] eta: 0:04:20 time: 3.0656 data: 2.7855 max mem: 22448 +eval (validation): [1] [20/85] eta: 0:00:32 time: 0.3687 data: 0.0039 max mem: 22448 +eval (validation): [1] [40/85] eta: 0:00:19 time: 0.3453 data: 0.0040 max mem: 22448 +eval (validation): [1] [60/85] eta: 0:00:09 time: 0.3309 data: 0.0041 max mem: 22448 +eval (validation): [1] [80/85] eta: 0:00:01 time: 0.3288 data: 0.0041 max mem: 22448 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3201 data: 0.0039 max mem: 22448 +eval (validation): [1] Total time: 0:00:32 (0.3768 s / it) +cv: [1] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 2.429 acc: 0.268 f1: 0.206 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:21:00 lr: nan time: 3.1505 data: 2.7591 max mem: 22448 +train: [2] [ 20/400] eta: 0:03:43 lr: 0.000123 loss: 2.9000 (2.8823) grad: 0.2242 (0.2275) time: 0.4613 data: 0.0038 max mem: 22448 +train: [2] [ 40/400] eta: 0:03:06 lr: 0.000126 loss: 2.8972 (2.8876) grad: 0.2181 (0.2200) time: 0.4454 data: 0.0047 max mem: 22448 +train: [2] [ 60/400] eta: 0:02:48 lr: 0.000129 loss: 2.8972 (2.8905) grad: 0.2158 (0.2202) time: 0.4460 data: 0.0048 max mem: 22448 +train: [2] [ 80/400] eta: 0:02:35 lr: 0.000132 loss: 2.9045 (2.8997) grad: 0.2357 (0.2434) time: 0.4585 data: 0.0048 max mem: 22448 +train: [2] [100/400] eta: 0:02:23 lr: 0.000135 loss: 2.9847 (2.9507) grad: 0.4538 (0.3515) time: 0.4463 data: 0.0047 max mem: 22448 +train: [2] [120/400] eta: 0:02:12 lr: 0.000138 loss: 3.4132 (3.0801) grad: 1.2122 (0.5670) time: 0.4495 data: 0.0047 max mem: 22448 +WARNING: classifier 48 (50, 1.0) diverged (loss=64.63 > 63.56) at step 462. Freezing. +train: [2] [140/400] eta: 0:02:02 lr: 0.000141 loss: 3.4132 (3.0802) grad: 1.0393 (0.5540) time: 0.4585 data: 0.0049 max mem: 22448 +train: [2] [160/400] eta: 0:01:52 lr: 0.000144 loss: 2.8575 (3.0506) grad: 0.2087 (0.5113) time: 0.4557 data: 0.0046 max mem: 22448 +train: [2] [180/400] eta: 0:01:43 lr: 0.000147 loss: 2.8189 (3.0252) grad: 0.2056 (0.4777) time: 0.4587 data: 0.0048 max mem: 22448 +train: [2] [200/400] eta: 0:01:34 lr: 0.000150 loss: 2.8194 (3.0057) grad: 0.2056 (0.4508) time: 0.4893 data: 0.0052 max mem: 22448 +train: [2] [220/400] eta: 0:01:24 lr: 0.000153 loss: 2.8454 (2.9932) grad: 0.2136 (0.4306) time: 0.4732 data: 0.0049 max mem: 22448 +train: [2] [240/400] eta: 0:01:15 lr: 0.000156 loss: 2.8636 (2.9830) grad: 0.2268 (0.4139) time: 0.4550 data: 0.0047 max mem: 22448 +train: [2] [260/400] eta: 0:01:05 lr: 0.000159 loss: 2.8598 (2.9717) grad: 0.2257 (0.3991) time: 0.4496 data: 0.0048 max mem: 22448 +train: [2] [280/400] eta: 0:00:56 lr: 0.000162 loss: 2.8471 (2.9639) grad: 0.2489 (0.3962) time: 0.4733 data: 0.0049 max mem: 22448 +train: [2] [300/400] eta: 0:00:46 lr: 0.000165 loss: 2.9680 (2.9825) grad: 0.6181 (0.4338) time: 0.4519 data: 0.0047 max mem: 22448 +WARNING: classifier 47 (43, 1.0) diverged (loss=65.65 > 63.56) at step 554. Freezing. +train: [2] [320/400] eta: 0:00:37 lr: 0.000168 loss: 3.1489 (2.9940) grad: 0.7985 (0.4534) time: 0.4514 data: 0.0048 max mem: 22448 +train: [2] [340/400] eta: 0:00:27 lr: 0.000171 loss: 2.8509 (2.9851) grad: 0.2091 (0.4389) time: 0.4468 data: 0.0047 max mem: 22448 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 2.8220 (2.9775) grad: 0.2081 (0.4263) time: 0.4496 data: 0.0048 max mem: 22448 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 2.8627 (2.9708) grad: 0.2184 (0.4157) time: 0.4472 data: 0.0046 max mem: 22448 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 2.8519 (2.9628) grad: 0.2189 (0.4056) time: 0.4458 data: 0.0048 max mem: 22448 +train: [2] Total time: 0:03:05 (0.4630 s / it) +train: [2] Summary: lr: 0.000180 loss: 2.8519 (2.9628) grad: 0.2189 (0.4056) +eval (validation): [2] [ 0/85] eta: 0:04:21 time: 3.0808 data: 2.7876 max mem: 22448 +eval (validation): [2] [20/85] eta: 0:00:30 time: 0.3336 data: 0.0041 max mem: 22448 +eval (validation): [2] [40/85] eta: 0:00:18 time: 0.3345 data: 0.0036 max mem: 22448 +eval (validation): [2] [60/85] eta: 0:00:09 time: 0.3396 data: 0.0040 max mem: 22448 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3265 data: 0.0039 max mem: 22448 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3196 data: 0.0038 max mem: 22448 +eval (validation): [2] Total time: 0:00:31 (0.3680 s / it) +cv: [2] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 2.416 acc: 0.270 f1: 0.195 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:21:11 lr: nan time: 3.1775 data: 2.7898 max mem: 22448 +train: [3] [ 20/400] eta: 0:03:34 lr: 0.000183 loss: 2.7585 (2.7726) grad: 0.2048 (0.2092) time: 0.4327 data: 0.0038 max mem: 22448 +train: [3] [ 40/400] eta: 0:02:59 lr: 0.000186 loss: 2.8060 (2.8107) grad: 0.2225 (0.2252) time: 0.4331 data: 0.0047 max mem: 22448 +train: [3] [ 60/400] eta: 0:02:42 lr: 0.000189 loss: 2.8100 (2.8071) grad: 0.2457 (0.2339) time: 0.4343 data: 0.0046 max mem: 22448 +train: [3] [ 80/400] eta: 0:02:30 lr: 0.000192 loss: 2.8341 (2.8345) grad: 0.2905 (0.2877) time: 0.4418 data: 0.0049 max mem: 22448 +train: [3] [100/400] eta: 0:02:19 lr: 0.000195 loss: 3.0274 (2.9377) grad: 0.5929 (0.4638) time: 0.4454 data: 0.0046 max mem: 22448 +WARNING: classifier 46 (36, 1.0) diverged (loss=70.20 > 63.56) at step 652. Freezing. +train: [3] [120/400] eta: 0:02:09 lr: 0.000198 loss: 3.0274 (2.9510) grad: 0.7296 (0.4738) time: 0.4414 data: 0.0046 max mem: 22448 +train: [3] [140/400] eta: 0:01:59 lr: 0.000201 loss: 2.8058 (2.9288) grad: 0.2352 (0.4398) time: 0.4546 data: 0.0048 max mem: 22448 +train: [3] [160/400] eta: 0:01:50 lr: 0.000204 loss: 2.8078 (2.9178) grad: 0.2322 (0.4131) time: 0.4489 data: 0.0050 max mem: 22448 +train: [3] [180/400] eta: 0:01:40 lr: 0.000207 loss: 2.8078 (2.9022) grad: 0.2267 (0.3937) time: 0.4481 data: 0.0047 max mem: 22448 +train: [3] [200/400] eta: 0:01:31 lr: 0.000210 loss: 2.7972 (2.8939) grad: 0.2389 (0.3789) time: 0.4616 data: 0.0051 max mem: 22448 +train: [3] [220/400] eta: 0:01:22 lr: 0.000213 loss: 2.8147 (2.8870) grad: 0.2599 (0.3702) time: 0.4587 data: 0.0049 max mem: 22448 +train: [3] [240/400] eta: 0:01:13 lr: 0.000216 loss: 2.8469 (2.9002) grad: 0.3407 (0.4003) time: 0.4388 data: 0.0047 max mem: 22448 +WARNING: classifier 45 (31, 1.0) diverged (loss=71.79 > 63.56) at step 726. Freezing. +train: [3] [260/400] eta: 0:01:03 lr: 0.000219 loss: 3.0548 (2.9325) grad: 0.6463 (0.4386) time: 0.4550 data: 0.0047 max mem: 22448 +train: [3] [280/400] eta: 0:00:54 lr: 0.000222 loss: 2.8281 (2.9227) grad: 0.2354 (0.4235) time: 0.4474 data: 0.0050 max mem: 22448 +train: [3] [300/400] eta: 0:00:45 lr: 0.000225 loss: 2.7949 (2.9142) grad: 0.2195 (0.4101) time: 0.4358 data: 0.0047 max mem: 22448 +train: [3] [320/400] eta: 0:00:36 lr: 0.000228 loss: 2.7671 (2.9055) grad: 0.2312 (0.4001) time: 0.4340 data: 0.0048 max mem: 22448 +train: [3] [340/400] eta: 0:00:27 lr: 0.000231 loss: 2.7889 (2.9006) grad: 0.2608 (0.3930) time: 0.4402 data: 0.0048 max mem: 22448 +train: [3] [360/400] eta: 0:00:18 lr: 0.000234 loss: 2.8581 (2.9016) grad: 0.2850 (0.3942) time: 0.4390 data: 0.0048 max mem: 22448 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 2.9692 (2.9210) grad: 0.6051 (0.4333) time: 0.4354 data: 0.0048 max mem: 22448 +WARNING: classifier 44 (26, 1.0) diverged (loss=67.73 > 63.56) at step 793. Freezing. +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 3.0523 (2.9315) grad: 0.8407 (0.4540) time: 0.4361 data: 0.0047 max mem: 22448 +train: [3] Total time: 0:03:00 (0.4506 s / it) +train: [3] Summary: lr: 0.000240 loss: 3.0523 (2.9315) grad: 0.8407 (0.4540) +eval (validation): [3] [ 0/85] eta: 0:04:25 time: 3.1239 data: 2.8475 max mem: 22448 +eval (validation): [3] [20/85] eta: 0:00:30 time: 0.3336 data: 0.0040 max mem: 22448 +eval (validation): [3] [40/85] eta: 0:00:17 time: 0.3300 data: 0.0037 max mem: 22448 +eval (validation): [3] [60/85] eta: 0:00:09 time: 0.3448 data: 0.0042 max mem: 22448 +eval (validation): [3] [80/85] eta: 0:00:01 time: 0.3262 data: 0.0041 max mem: 22448 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.3209 data: 0.0040 max mem: 22448 +eval (validation): [3] Total time: 0:00:31 (0.3683 s / it) +cv: [3] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 2.403 acc: 0.272 f1: 0.198 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:20:50 lr: nan time: 3.1256 data: 2.7454 max mem: 22448 +train: [4] [ 20/400] eta: 0:03:31 lr: 0.000243 loss: 2.7124 (2.7195) grad: 0.2138 (0.2203) time: 0.4278 data: 0.0047 max mem: 22448 +train: [4] [ 40/400] eta: 0:02:58 lr: 0.000246 loss: 2.7218 (2.7392) grad: 0.2138 (0.2200) time: 0.4321 data: 0.0042 max mem: 22448 +train: [4] [ 60/400] eta: 0:02:41 lr: 0.000249 loss: 2.7218 (2.7429) grad: 0.2115 (0.2190) time: 0.4293 data: 0.0047 max mem: 22448 +train: [4] [ 80/400] eta: 0:02:28 lr: 0.000252 loss: 2.7213 (2.7430) grad: 0.2098 (0.2173) time: 0.4361 data: 0.0046 max mem: 22448 +train: [4] [100/400] eta: 0:02:17 lr: 0.000255 loss: 2.7668 (2.7520) grad: 0.2170 (0.2175) time: 0.4345 data: 0.0048 max mem: 22448 +train: [4] [120/400] eta: 0:02:08 lr: 0.000258 loss: 2.7625 (2.7474) grad: 0.2195 (0.2179) time: 0.4577 data: 0.0049 max mem: 22448 +train: [4] [140/400] eta: 0:01:58 lr: 0.000261 loss: 2.7178 (2.7455) grad: 0.2257 (0.2203) time: 0.4390 data: 0.0047 max mem: 22448 +train: [4] [160/400] eta: 0:01:48 lr: 0.000264 loss: 2.7432 (2.7451) grad: 0.2365 (0.2227) time: 0.4404 data: 0.0048 max mem: 22448 +train: [4] [180/400] eta: 0:01:39 lr: 0.000267 loss: 2.7288 (2.7474) grad: 0.2390 (0.2250) time: 0.4466 data: 0.0048 max mem: 22448 +train: [4] [200/400] eta: 0:01:30 lr: 0.000270 loss: 2.7170 (2.7437) grad: 0.2390 (0.2262) time: 0.4648 data: 0.0050 max mem: 22448 +train: [4] [220/400] eta: 0:01:21 lr: 0.000273 loss: 2.7200 (2.7459) grad: 0.2390 (0.2279) time: 0.4522 data: 0.0050 max mem: 22448 +train: [4] [240/400] eta: 0:01:12 lr: 0.000276 loss: 2.7569 (2.7459) grad: 0.2364 (0.2287) time: 0.4342 data: 0.0047 max mem: 22448 +train: [4] [260/400] eta: 0:01:03 lr: 0.000279 loss: 2.7717 (2.7470) grad: 0.2305 (0.2294) time: 0.4705 data: 0.0049 max mem: 22448 +train: [4] [280/400] eta: 0:00:54 lr: 0.000282 loss: 2.7717 (2.7482) grad: 0.2305 (0.2306) time: 0.4458 data: 0.0048 max mem: 22448 +train: [4] [300/400] eta: 0:00:45 lr: 0.000285 loss: 2.7516 (2.7485) grad: 0.2358 (0.2307) time: 0.4330 data: 0.0050 max mem: 22448 +train: [4] [320/400] eta: 0:00:36 lr: 0.000288 loss: 2.7423 (2.7476) grad: 0.2232 (0.2299) time: 0.4323 data: 0.0046 max mem: 22448 +train: [4] [340/400] eta: 0:00:27 lr: 0.000291 loss: 2.7363 (2.7476) grad: 0.2148 (0.2292) time: 0.4484 data: 0.0049 max mem: 22448 +train: [4] [360/400] eta: 0:00:17 lr: 0.000294 loss: 2.7325 (2.7484) grad: 0.2217 (0.2290) time: 0.4398 data: 0.0048 max mem: 22448 +train: [4] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.7325 (2.7470) grad: 0.2251 (0.2290) time: 0.4348 data: 0.0049 max mem: 22448 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 2.7229 (2.7452) grad: 0.2247 (0.2289) time: 0.4355 data: 0.0049 max mem: 22448 +train: [4] Total time: 0:02:59 (0.4491 s / it) +train: [4] Summary: lr: 0.000300 loss: 2.7229 (2.7452) grad: 0.2247 (0.2289) +eval (validation): [4] [ 0/85] eta: 0:04:19 time: 3.0584 data: 2.8267 max mem: 22448 +eval (validation): [4] [20/85] eta: 0:00:30 time: 0.3407 data: 0.0051 max mem: 22448 +eval (validation): [4] [40/85] eta: 0:00:18 time: 0.3279 data: 0.0033 max mem: 22448 +eval (validation): [4] [60/85] eta: 0:00:09 time: 0.3269 data: 0.0038 max mem: 22448 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.3203 data: 0.0037 max mem: 22448 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.3124 data: 0.0036 max mem: 22448 +eval (validation): [4] Total time: 0:00:30 (0.3627 s / it) +cv: [4] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 2.403 acc: 0.280 f1: 0.213 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:21:10 lr: nan time: 3.1759 data: 2.8026 max mem: 22448 +train: [5] [ 20/400] eta: 0:03:31 lr: 0.000300 loss: 2.6190 (2.6055) grad: 0.2255 (0.2283) time: 0.4264 data: 0.0041 max mem: 22448 +train: [5] [ 40/400] eta: 0:02:59 lr: 0.000300 loss: 2.6358 (2.6525) grad: 0.2286 (0.2327) time: 0.4392 data: 0.0046 max mem: 22448 +train: [5] [ 60/400] eta: 0:02:43 lr: 0.000300 loss: 2.6989 (2.6658) grad: 0.2333 (0.2359) time: 0.4379 data: 0.0047 max mem: 22448 +train: [5] [ 80/400] eta: 0:02:29 lr: 0.000300 loss: 2.6992 (2.6679) grad: 0.2416 (0.2363) time: 0.4319 data: 0.0049 max mem: 22448 +train: [5] [100/400] eta: 0:02:19 lr: 0.000300 loss: 2.6881 (2.6760) grad: 0.2418 (0.2398) time: 0.4457 data: 0.0045 max mem: 22448 +train: [5] [120/400] eta: 0:02:09 lr: 0.000300 loss: 2.6663 (2.6697) grad: 0.2425 (0.2397) time: 0.4494 data: 0.0049 max mem: 22448 +train: [5] [140/400] eta: 0:01:59 lr: 0.000300 loss: 2.6265 (2.6650) grad: 0.2317 (0.2381) time: 0.4384 data: 0.0050 max mem: 22448 +train: [5] [160/400] eta: 0:01:49 lr: 0.000299 loss: 2.6411 (2.6646) grad: 0.2309 (0.2381) time: 0.4380 data: 0.0047 max mem: 22448 +train: [5] [180/400] eta: 0:01:40 lr: 0.000299 loss: 2.6590 (2.6667) grad: 0.2421 (0.2385) time: 0.4788 data: 0.0052 max mem: 22448 +train: [5] [200/400] eta: 0:01:31 lr: 0.000299 loss: 2.6748 (2.6660) grad: 0.2421 (0.2386) time: 0.4519 data: 0.0049 max mem: 22448 +train: [5] [220/400] eta: 0:01:22 lr: 0.000299 loss: 2.6286 (2.6643) grad: 0.2390 (0.2378) time: 0.4466 data: 0.0049 max mem: 22448 +train: [5] [240/400] eta: 0:01:12 lr: 0.000299 loss: 2.6402 (2.6643) grad: 0.2395 (0.2381) time: 0.4341 data: 0.0047 max mem: 22448 +train: [5] [260/400] eta: 0:01:03 lr: 0.000299 loss: 2.6371 (2.6600) grad: 0.2346 (0.2377) time: 0.4553 data: 0.0049 max mem: 22448 +train: [5] [280/400] eta: 0:00:54 lr: 0.000298 loss: 2.6371 (2.6623) grad: 0.2355 (0.2381) time: 0.4350 data: 0.0049 max mem: 22448 +train: [5] [300/400] eta: 0:00:45 lr: 0.000298 loss: 2.6231 (2.6590) grad: 0.2380 (0.2380) time: 0.4350 data: 0.0049 max mem: 22448 +train: [5] [320/400] eta: 0:00:36 lr: 0.000298 loss: 2.6231 (2.6594) grad: 0.2435 (0.2388) time: 0.4279 data: 0.0050 max mem: 22448 +train: [5] [340/400] eta: 0:00:26 lr: 0.000298 loss: 2.6560 (2.6591) grad: 0.2464 (0.2388) time: 0.4398 data: 0.0046 max mem: 22448 +train: [5] [360/400] eta: 0:00:17 lr: 0.000297 loss: 2.6307 (2.6590) grad: 0.2397 (0.2391) time: 0.4390 data: 0.0047 max mem: 22448 +train: [5] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.6305 (2.6595) grad: 0.2465 (0.2397) time: 0.4325 data: 0.0048 max mem: 22448 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 2.6162 (2.6564) grad: 0.2422 (0.2392) time: 0.4341 data: 0.0049 max mem: 22448 +train: [5] Total time: 0:02:59 (0.4483 s / it) +train: [5] Summary: lr: 0.000297 loss: 2.6162 (2.6564) grad: 0.2422 (0.2392) +eval (validation): [5] [ 0/85] eta: 0:04:20 time: 3.0650 data: 2.8347 max mem: 22448 +eval (validation): [5] [20/85] eta: 0:00:31 time: 0.3503 data: 0.0041 max mem: 22448 +eval (validation): [5] [40/85] eta: 0:00:18 time: 0.3313 data: 0.0041 max mem: 22448 +eval (validation): [5] [60/85] eta: 0:00:09 time: 0.3322 data: 0.0040 max mem: 22448 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3268 data: 0.0037 max mem: 22448 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3161 data: 0.0036 max mem: 22448 +eval (validation): [5] Total time: 0:00:31 (0.3690 s / it) +cv: [5] best hparam: (1.4, 1.0) (026) ('026_lr1.4e+00_wd1.0e+00') loss: 2.376 acc: 0.286 f1: 0.218 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:21:01 lr: nan time: 3.1537 data: 2.7775 max mem: 22448 +train: [6] [ 20/400] eta: 0:03:36 lr: 0.000296 loss: 2.5868 (2.5671) grad: 0.2395 (0.2339) time: 0.4398 data: 0.0049 max mem: 22448 +train: [6] [ 40/400] eta: 0:03:01 lr: 0.000296 loss: 2.5944 (2.5959) grad: 0.2408 (0.2412) time: 0.4377 data: 0.0050 max mem: 22448 +train: [6] [ 60/400] eta: 0:02:43 lr: 0.000296 loss: 2.5695 (2.5892) grad: 0.2392 (0.2412) time: 0.4314 data: 0.0048 max mem: 22448 +train: [6] [ 80/400] eta: 0:02:30 lr: 0.000295 loss: 2.5377 (2.5771) grad: 0.2381 (0.2416) time: 0.4322 data: 0.0048 max mem: 22448 +train: [6] [100/400] eta: 0:02:19 lr: 0.000295 loss: 2.5396 (2.5761) grad: 0.2382 (0.2408) time: 0.4558 data: 0.0048 max mem: 22448 +train: [6] [120/400] eta: 0:02:09 lr: 0.000295 loss: 2.5560 (2.5806) grad: 0.2382 (0.2418) time: 0.4406 data: 0.0048 max mem: 22448 +train: [6] [140/400] eta: 0:01:59 lr: 0.000294 loss: 2.6184 (2.5880) grad: 0.2406 (0.2418) time: 0.4382 data: 0.0048 max mem: 22448 +train: [6] [160/400] eta: 0:01:49 lr: 0.000294 loss: 2.6156 (2.5921) grad: 0.2427 (0.2417) time: 0.4427 data: 0.0048 max mem: 22448 +train: [6] [180/400] eta: 0:01:41 lr: 0.000293 loss: 2.5923 (2.5916) grad: 0.2469 (0.2427) time: 0.4966 data: 0.0053 max mem: 22448 +train: [6] [200/400] eta: 0:01:32 lr: 0.000293 loss: 2.6113 (2.5936) grad: 0.2504 (0.2436) time: 0.4602 data: 0.0047 max mem: 22448 +train: [6] [220/400] eta: 0:01:22 lr: 0.000292 loss: 2.6113 (2.5921) grad: 0.2483 (0.2440) time: 0.4335 data: 0.0047 max mem: 22448 +train: [6] [240/400] eta: 0:01:13 lr: 0.000292 loss: 2.6075 (2.5950) grad: 0.2458 (0.2441) time: 0.4590 data: 0.0049 max mem: 22448 +train: [6] [260/400] eta: 0:01:04 lr: 0.000291 loss: 2.6018 (2.5907) grad: 0.2414 (0.2437) time: 0.4490 data: 0.0046 max mem: 22448 +train: [6] [280/400] eta: 0:00:54 lr: 0.000291 loss: 2.5857 (2.5908) grad: 0.2358 (0.2436) time: 0.4381 data: 0.0050 max mem: 22448 +train: [6] [300/400] eta: 0:00:45 lr: 0.000290 loss: 2.6101 (2.5932) grad: 0.2438 (0.2440) time: 0.4493 data: 0.0050 max mem: 22448 +train: [6] [320/400] eta: 0:00:36 lr: 0.000290 loss: 2.6213 (2.5947) grad: 0.2475 (0.2448) time: 0.4455 data: 0.0050 max mem: 22448 +train: [6] [340/400] eta: 0:00:27 lr: 0.000289 loss: 2.6377 (2.5961) grad: 0.2515 (0.2452) time: 0.4380 data: 0.0047 max mem: 22448 +train: [6] [360/400] eta: 0:00:18 lr: 0.000288 loss: 2.5989 (2.5935) grad: 0.2491 (0.2453) time: 0.4423 data: 0.0046 max mem: 22448 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 2.5423 (2.5949) grad: 0.2452 (0.2453) time: 0.4486 data: 0.0046 max mem: 22448 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 2.5935 (2.5952) grad: 0.2360 (0.2446) time: 0.4362 data: 0.0047 max mem: 22448 +train: [6] Total time: 0:03:01 (0.4531 s / it) +train: [6] Summary: lr: 0.000287 loss: 2.5935 (2.5952) grad: 0.2360 (0.2446) +eval (validation): [6] [ 0/85] eta: 0:04:30 time: 3.1787 data: 2.8760 max mem: 22448 +eval (validation): [6] [20/85] eta: 0:00:33 time: 0.3825 data: 0.0046 max mem: 22448 +eval (validation): [6] [40/85] eta: 0:00:19 time: 0.3569 data: 0.0042 max mem: 22448 +eval (validation): [6] [60/85] eta: 0:00:10 time: 0.3376 data: 0.0041 max mem: 22448 +eval (validation): [6] [80/85] eta: 0:00:01 time: 0.3281 data: 0.0039 max mem: 22448 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3195 data: 0.0037 max mem: 22448 +eval (validation): [6] Total time: 0:00:32 (0.3857 s / it) +cv: [6] best hparam: (0.72, 1.0) (022) ('022_lr7.2e-01_wd1.0e+00') loss: 2.383 acc: 0.283 f1: 0.223 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [7] [ 0/400] eta: 0:21:46 lr: nan time: 3.2665 data: 2.8816 max mem: 22448 +train: [7] [ 20/400] eta: 0:03:43 lr: 0.000286 loss: 2.4779 (2.5003) grad: 0.2280 (0.2361) time: 0.4532 data: 0.0032 max mem: 22448 +train: [7] [ 40/400] eta: 0:03:05 lr: 0.000286 loss: 2.5054 (2.5014) grad: 0.2327 (0.2417) time: 0.4394 data: 0.0050 max mem: 22448 +train: [7] [ 60/400] eta: 0:02:46 lr: 0.000285 loss: 2.5054 (2.4914) grad: 0.2506 (0.2462) time: 0.4343 data: 0.0047 max mem: 22448 +train: [7] [ 80/400] eta: 0:02:32 lr: 0.000284 loss: 2.4917 (2.5077) grad: 0.2486 (0.2450) time: 0.4437 data: 0.0048 max mem: 22448 +train: [7] [100/400] eta: 0:02:21 lr: 0.000284 loss: 2.4862 (2.4997) grad: 0.2444 (0.2445) time: 0.4525 data: 0.0051 max mem: 22448 +train: [7] [120/400] eta: 0:02:11 lr: 0.000283 loss: 2.4862 (2.5013) grad: 0.2476 (0.2460) time: 0.4461 data: 0.0049 max mem: 22448 +train: [7] [140/400] eta: 0:02:00 lr: 0.000282 loss: 2.5319 (2.5068) grad: 0.2470 (0.2461) time: 0.4396 data: 0.0048 max mem: 22448 +train: [7] [160/400] eta: 0:01:51 lr: 0.000282 loss: 2.5445 (2.5100) grad: 0.2439 (0.2461) time: 0.4784 data: 0.0048 max mem: 22448 +train: [7] [180/400] eta: 0:01:42 lr: 0.000281 loss: 2.5519 (2.5162) grad: 0.2489 (0.2470) time: 0.4704 data: 0.0051 max mem: 22448 +train: [7] [200/400] eta: 0:01:32 lr: 0.000280 loss: 2.5333 (2.5153) grad: 0.2484 (0.2473) time: 0.4492 data: 0.0048 max mem: 22448 +train: [7] [220/400] eta: 0:01:22 lr: 0.000279 loss: 2.5083 (2.5135) grad: 0.2482 (0.2479) time: 0.4183 data: 0.0043 max mem: 22448 +train: [7] [240/400] eta: 0:01:13 lr: 0.000278 loss: 2.5359 (2.5155) grad: 0.2485 (0.2485) time: 0.4800 data: 0.0051 max mem: 22448 +train: [7] [260/400] eta: 0:01:04 lr: 0.000278 loss: 2.5193 (2.5144) grad: 0.2459 (0.2479) time: 0.4423 data: 0.0052 max mem: 22448 +train: [7] [280/400] eta: 0:00:55 lr: 0.000277 loss: 2.4837 (2.5130) grad: 0.2431 (0.2479) time: 0.4393 data: 0.0048 max mem: 22448 +train: [7] [300/400] eta: 0:00:45 lr: 0.000276 loss: 2.4653 (2.5111) grad: 0.2441 (0.2480) time: 0.4231 data: 0.0046 max mem: 22448 +train: [7] [320/400] eta: 0:00:36 lr: 0.000275 loss: 2.4655 (2.5109) grad: 0.2418 (0.2478) time: 0.4465 data: 0.0049 max mem: 22448 +train: [7] [340/400] eta: 0:00:27 lr: 0.000274 loss: 2.4803 (2.5086) grad: 0.2361 (0.2474) time: 0.4361 data: 0.0048 max mem: 22448 +train: [7] [360/400] eta: 0:00:18 lr: 0.000273 loss: 2.4875 (2.5102) grad: 0.2404 (0.2476) time: 0.4430 data: 0.0050 max mem: 22448 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 2.5345 (2.5110) grad: 0.2473 (0.2482) time: 0.4425 data: 0.0051 max mem: 22448 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 2.5637 (2.5128) grad: 0.2518 (0.2488) time: 0.4375 data: 0.0049 max mem: 22448 +train: [7] Total time: 0:03:01 (0.4534 s / it) +train: [7] Summary: lr: 0.000271 loss: 2.5637 (2.5128) grad: 0.2518 (0.2488) +eval (validation): [7] [ 0/85] eta: 0:04:36 time: 3.2516 data: 2.9660 max mem: 22448 +eval (validation): [7] [20/85] eta: 0:00:31 time: 0.3455 data: 0.0225 max mem: 22448 +eval (validation): [7] [40/85] eta: 0:00:18 time: 0.3437 data: 0.0036 max mem: 22448 +eval (validation): [7] [60/85] eta: 0:00:09 time: 0.3362 data: 0.0041 max mem: 22448 +eval (validation): [7] [80/85] eta: 0:00:01 time: 0.3358 data: 0.0040 max mem: 22448 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.3249 data: 0.0040 max mem: 22448 +eval (validation): [7] Total time: 0:00:32 (0.3765 s / it) +cv: [7] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.376 acc: 0.280 f1: 0.222 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:21:40 lr: nan time: 3.2513 data: 2.9084 max mem: 22448 +train: [8] [ 20/400] eta: 0:03:37 lr: 0.000270 loss: 2.3525 (2.4004) grad: 0.2363 (0.2405) time: 0.4395 data: 0.0044 max mem: 22448 +train: [8] [ 40/400] eta: 0:03:05 lr: 0.000270 loss: 2.3962 (2.4132) grad: 0.2469 (0.2441) time: 0.4515 data: 0.0042 max mem: 22448 +train: [8] [ 60/400] eta: 0:02:46 lr: 0.000269 loss: 2.4286 (2.4273) grad: 0.2469 (0.2450) time: 0.4414 data: 0.0048 max mem: 22448 +train: [8] [ 80/400] eta: 0:02:33 lr: 0.000268 loss: 2.4705 (2.4347) grad: 0.2501 (0.2472) time: 0.4425 data: 0.0047 max mem: 22448 +train: [8] [100/400] eta: 0:02:21 lr: 0.000267 loss: 2.4624 (2.4326) grad: 0.2554 (0.2507) time: 0.4461 data: 0.0046 max mem: 22448 +train: [8] [120/400] eta: 0:02:10 lr: 0.000266 loss: 2.4478 (2.4376) grad: 0.2757 (0.2561) time: 0.4355 data: 0.0049 max mem: 22448 +train: [8] [140/400] eta: 0:02:00 lr: 0.000265 loss: 2.4511 (2.4426) grad: 0.2792 (0.2584) time: 0.4447 data: 0.0049 max mem: 22448 +train: [8] [160/400] eta: 0:01:51 lr: 0.000264 loss: 2.4587 (2.4440) grad: 0.2771 (0.2611) time: 0.4777 data: 0.0050 max mem: 22448 +train: [8] [180/400] eta: 0:01:42 lr: 0.000263 loss: 2.4505 (2.4403) grad: 0.2625 (0.2610) time: 0.4592 data: 0.0048 max mem: 22448 +train: [8] [200/400] eta: 0:01:32 lr: 0.000262 loss: 2.4349 (2.4442) grad: 0.2571 (0.2611) time: 0.4437 data: 0.0045 max mem: 22448 +train: [8] [220/400] eta: 0:01:22 lr: 0.000260 loss: 2.4653 (2.4468) grad: 0.2550 (0.2605) time: 0.4266 data: 0.0044 max mem: 22448 +train: [8] [240/400] eta: 0:01:13 lr: 0.000259 loss: 2.4524 (2.4458) grad: 0.2516 (0.2600) time: 0.4907 data: 0.0051 max mem: 22448 +train: [8] [260/400] eta: 0:01:04 lr: 0.000258 loss: 2.4509 (2.4473) grad: 0.2516 (0.2598) time: 0.4531 data: 0.0049 max mem: 22448 +train: [8] [280/400] eta: 0:00:55 lr: 0.000257 loss: 2.4358 (2.4461) grad: 0.2505 (0.2598) time: 0.4393 data: 0.0049 max mem: 22448 +train: [8] [300/400] eta: 0:00:45 lr: 0.000256 loss: 2.4213 (2.4462) grad: 0.2499 (0.2596) time: 0.4380 data: 0.0048 max mem: 22448 +train: [8] [320/400] eta: 0:00:36 lr: 0.000255 loss: 2.4213 (2.4456) grad: 0.2468 (0.2588) time: 0.4529 data: 0.0047 max mem: 22448 +train: [8] [340/400] eta: 0:00:27 lr: 0.000254 loss: 2.4404 (2.4456) grad: 0.2511 (0.2592) time: 0.4464 data: 0.0050 max mem: 22448 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 2.4427 (2.4450) grad: 0.2558 (0.2587) time: 0.4395 data: 0.0049 max mem: 22448 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 2.4678 (2.4464) grad: 0.2526 (0.2585) time: 0.4584 data: 0.0048 max mem: 22448 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 2.4682 (2.4467) grad: 0.2559 (0.2589) time: 0.4445 data: 0.0049 max mem: 22448 +train: [8] Total time: 0:03:02 (0.4562 s / it) +train: [8] Summary: lr: 0.000250 loss: 2.4682 (2.4467) grad: 0.2559 (0.2589) +eval (validation): [8] [ 0/85] eta: 0:04:20 time: 3.0705 data: 2.8344 max mem: 22448 +eval (validation): [8] [20/85] eta: 0:00:30 time: 0.3388 data: 0.0047 max mem: 22448 +eval (validation): [8] [40/85] eta: 0:00:17 time: 0.3267 data: 0.0033 max mem: 22448 +eval (validation): [8] [60/85] eta: 0:00:09 time: 0.3263 data: 0.0038 max mem: 22448 +eval (validation): [8] [80/85] eta: 0:00:01 time: 0.3286 data: 0.0041 max mem: 22448 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.3221 data: 0.0041 max mem: 22448 +eval (validation): [8] Total time: 0:00:30 (0.3645 s / it) +cv: [8] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.425 acc: 0.273 f1: 0.219 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:20:22 lr: nan time: 3.0558 data: 2.6789 max mem: 22448 +train: [9] [ 20/400] eta: 0:03:30 lr: 0.000249 loss: 2.3952 (2.4026) grad: 0.2570 (0.2663) time: 0.4287 data: 0.0040 max mem: 22448 +train: [9] [ 40/400] eta: 0:02:59 lr: 0.000248 loss: 2.4358 (2.4165) grad: 0.2559 (0.2595) time: 0.4413 data: 0.0046 max mem: 22448 +train: [9] [ 60/400] eta: 0:02:43 lr: 0.000247 loss: 2.4066 (2.4064) grad: 0.2533 (0.2565) time: 0.4429 data: 0.0047 max mem: 22448 +train: [9] [ 80/400] eta: 0:02:30 lr: 0.000246 loss: 2.3907 (2.4112) grad: 0.2582 (0.2582) time: 0.4358 data: 0.0046 max mem: 22448 +train: [9] [100/400] eta: 0:02:18 lr: 0.000244 loss: 2.3923 (2.4091) grad: 0.2631 (0.2594) time: 0.4362 data: 0.0046 max mem: 22448 +train: [9] [120/400] eta: 0:02:08 lr: 0.000243 loss: 2.3892 (2.4084) grad: 0.2583 (0.2594) time: 0.4437 data: 0.0049 max mem: 22448 +train: [9] [140/400] eta: 0:01:58 lr: 0.000242 loss: 2.4039 (2.4097) grad: 0.2584 (0.2606) time: 0.4350 data: 0.0046 max mem: 22448 +train: [9] [160/400] eta: 0:01:49 lr: 0.000241 loss: 2.4039 (2.4060) grad: 0.2665 (0.2615) time: 0.4533 data: 0.0048 max mem: 22448 +train: [9] [180/400] eta: 0:01:39 lr: 0.000240 loss: 2.3904 (2.4087) grad: 0.2592 (0.2624) time: 0.4327 data: 0.0044 max mem: 22448 +train: [9] [200/400] eta: 0:01:30 lr: 0.000238 loss: 2.3700 (2.4044) grad: 0.2651 (0.2627) time: 0.4389 data: 0.0048 max mem: 22448 +train: [9] [220/400] eta: 0:01:21 lr: 0.000237 loss: 2.3624 (2.4018) grad: 0.2680 (0.2636) time: 0.4311 data: 0.0045 max mem: 22448 +train: [9] [240/400] eta: 0:01:12 lr: 0.000236 loss: 2.4254 (2.4066) grad: 0.2654 (0.2633) time: 0.4800 data: 0.0050 max mem: 22448 +train: [9] [260/400] eta: 0:01:03 lr: 0.000234 loss: 2.4254 (2.4048) grad: 0.2522 (0.2625) time: 0.4357 data: 0.0048 max mem: 22448 +train: [9] [280/400] eta: 0:00:54 lr: 0.000233 loss: 2.3510 (2.4035) grad: 0.2522 (0.2622) time: 0.4369 data: 0.0047 max mem: 22448 +train: [9] [300/400] eta: 0:00:44 lr: 0.000232 loss: 2.3782 (2.4029) grad: 0.2600 (0.2624) time: 0.4324 data: 0.0048 max mem: 22448 +train: [9] [320/400] eta: 0:00:35 lr: 0.000230 loss: 2.3935 (2.4052) grad: 0.2609 (0.2624) time: 0.4416 data: 0.0048 max mem: 22448 +train: [9] [340/400] eta: 0:00:26 lr: 0.000229 loss: 2.3935 (2.4030) grad: 0.2673 (0.2626) time: 0.4342 data: 0.0047 max mem: 22448 +train: [9] [360/400] eta: 0:00:17 lr: 0.000228 loss: 2.4040 (2.4028) grad: 0.2677 (0.2632) time: 0.4272 data: 0.0044 max mem: 22448 +train: [9] [380/400] eta: 0:00:08 lr: 0.000226 loss: 2.3741 (2.4009) grad: 0.2665 (0.2632) time: 0.4298 data: 0.0046 max mem: 22448 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 2.3835 (2.4014) grad: 0.2639 (0.2634) time: 0.4290 data: 0.0045 max mem: 22448 +train: [9] Total time: 0:02:58 (0.4454 s / it) +train: [9] Summary: lr: 0.000225 loss: 2.3835 (2.4014) grad: 0.2639 (0.2634) +eval (validation): [9] [ 0/85] eta: 0:04:51 time: 3.4272 data: 3.1383 max mem: 22448 +eval (validation): [9] [20/85] eta: 0:00:32 time: 0.3486 data: 0.0032 max mem: 22448 +eval (validation): [9] [40/85] eta: 0:00:18 time: 0.3363 data: 0.0039 max mem: 22448 +eval (validation): [9] [60/85] eta: 0:00:09 time: 0.3271 data: 0.0038 max mem: 22448 +eval (validation): [9] [80/85] eta: 0:00:01 time: 0.3211 data: 0.0039 max mem: 22448 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3144 data: 0.0038 max mem: 22448 +eval (validation): [9] Total time: 0:00:31 (0.3714 s / it) +cv: [9] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.382 acc: 0.285 f1: 0.223 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:21:03 lr: nan time: 3.1578 data: 2.7821 max mem: 22448 +train: [10] [ 20/400] eta: 0:03:36 lr: 0.000224 loss: 2.3219 (2.3212) grad: 0.2694 (0.2680) time: 0.4408 data: 0.0041 max mem: 22448 +train: [10] [ 40/400] eta: 0:03:00 lr: 0.000222 loss: 2.3219 (2.3223) grad: 0.2579 (0.2621) time: 0.4318 data: 0.0049 max mem: 22448 +train: [10] [ 60/400] eta: 0:02:43 lr: 0.000221 loss: 2.3426 (2.3430) grad: 0.2556 (0.2605) time: 0.4378 data: 0.0050 max mem: 22448 +train: [10] [ 80/400] eta: 0:02:30 lr: 0.000220 loss: 2.3117 (2.3307) grad: 0.2501 (0.2584) time: 0.4350 data: 0.0048 max mem: 22448 +train: [10] [100/400] eta: 0:02:18 lr: 0.000218 loss: 2.2841 (2.3259) grad: 0.2506 (0.2581) time: 0.4293 data: 0.0047 max mem: 22448 +train: [10] [120/400] eta: 0:02:08 lr: 0.000217 loss: 2.3140 (2.3254) grad: 0.2543 (0.2579) time: 0.4348 data: 0.0048 max mem: 22448 +train: [10] [140/400] eta: 0:01:58 lr: 0.000215 loss: 2.3382 (2.3274) grad: 0.2543 (0.2578) time: 0.4344 data: 0.0047 max mem: 22448 +train: [10] [160/400] eta: 0:01:49 lr: 0.000214 loss: 2.3662 (2.3300) grad: 0.2473 (0.2573) time: 0.4662 data: 0.0050 max mem: 22448 +train: [10] [180/400] eta: 0:01:40 lr: 0.000213 loss: 2.3438 (2.3310) grad: 0.2545 (0.2576) time: 0.4520 data: 0.0048 max mem: 22448 +train: [10] [200/400] eta: 0:01:30 lr: 0.000211 loss: 2.3438 (2.3319) grad: 0.2589 (0.2578) time: 0.4400 data: 0.0048 max mem: 22448 +train: [10] [220/400] eta: 0:01:21 lr: 0.000210 loss: 2.3394 (2.3321) grad: 0.2615 (0.2580) time: 0.4188 data: 0.0046 max mem: 22448 +train: [10] [240/400] eta: 0:01:12 lr: 0.000208 loss: 2.3148 (2.3316) grad: 0.2619 (0.2581) time: 0.4477 data: 0.0050 max mem: 22448 +train: [10] [260/400] eta: 0:01:02 lr: 0.000207 loss: 2.3475 (2.3321) grad: 0.2619 (0.2586) time: 0.4280 data: 0.0052 max mem: 22448 +train: [10] [280/400] eta: 0:00:53 lr: 0.000205 loss: 2.3652 (2.3322) grad: 0.2610 (0.2589) time: 0.4340 data: 0.0047 max mem: 22448 +train: [10] [300/400] eta: 0:00:44 lr: 0.000204 loss: 2.2810 (2.3287) grad: 0.2598 (0.2589) time: 0.4239 data: 0.0044 max mem: 22448 +train: [10] [320/400] eta: 0:00:35 lr: 0.000202 loss: 2.2631 (2.3261) grad: 0.2622 (0.2593) time: 0.4239 data: 0.0047 max mem: 22448 +train: [10] [340/400] eta: 0:00:26 lr: 0.000201 loss: 2.3176 (2.3269) grad: 0.2619 (0.2595) time: 0.4215 data: 0.0048 max mem: 22448 +train: [10] [360/400] eta: 0:00:17 lr: 0.000199 loss: 2.3018 (2.3260) grad: 0.2566 (0.2594) time: 0.4220 data: 0.0047 max mem: 22448 +train: [10] [380/400] eta: 0:00:08 lr: 0.000198 loss: 2.2882 (2.3246) grad: 0.2577 (0.2596) time: 0.4225 data: 0.0048 max mem: 22448 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 2.3179 (2.3267) grad: 0.2634 (0.2602) time: 0.4208 data: 0.0048 max mem: 22448 +train: [10] Total time: 0:02:56 (0.4407 s / it) +train: [10] Summary: lr: 0.000196 loss: 2.3179 (2.3267) grad: 0.2634 (0.2602) +eval (validation): [10] [ 0/85] eta: 0:04:27 time: 3.1514 data: 2.8665 max mem: 22448 +eval (validation): [10] [20/85] eta: 0:00:31 time: 0.3462 data: 0.0037 max mem: 22448 +eval (validation): [10] [40/85] eta: 0:00:18 time: 0.3185 data: 0.0039 max mem: 22448 +eval (validation): [10] [60/85] eta: 0:00:09 time: 0.3212 data: 0.0040 max mem: 22448 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3126 data: 0.0041 max mem: 22448 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3049 data: 0.0041 max mem: 22448 +eval (validation): [10] Total time: 0:00:30 (0.3592 s / it) +cv: [10] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.383 acc: 0.286 f1: 0.226 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:20:58 lr: nan time: 3.1459 data: 2.8225 max mem: 22448 +train: [11] [ 20/400] eta: 0:03:33 lr: 0.000195 loss: 2.2369 (2.2401) grad: 0.2483 (0.2530) time: 0.4334 data: 0.0042 max mem: 22448 +train: [11] [ 40/400] eta: 0:02:59 lr: 0.000193 loss: 2.2668 (2.2655) grad: 0.2546 (0.2548) time: 0.4309 data: 0.0042 max mem: 22448 +train: [11] [ 60/400] eta: 0:02:41 lr: 0.000192 loss: 2.2322 (2.2488) grad: 0.2604 (0.2576) time: 0.4258 data: 0.0049 max mem: 22448 +train: [11] [ 80/400] eta: 0:02:28 lr: 0.000190 loss: 2.2409 (2.2691) grad: 0.2604 (0.2592) time: 0.4284 data: 0.0048 max mem: 22448 +train: [11] [100/400] eta: 0:02:16 lr: 0.000189 loss: 2.3126 (2.2645) grad: 0.2553 (0.2589) time: 0.4285 data: 0.0047 max mem: 22448 +train: [11] [120/400] eta: 0:02:06 lr: 0.000187 loss: 2.2162 (2.2542) grad: 0.2561 (0.2593) time: 0.4308 data: 0.0047 max mem: 22448 +train: [11] [140/400] eta: 0:01:56 lr: 0.000186 loss: 2.2509 (2.2586) grad: 0.2620 (0.2599) time: 0.4259 data: 0.0047 max mem: 22448 +train: [11] [160/400] eta: 0:01:48 lr: 0.000184 loss: 2.2826 (2.2649) grad: 0.2662 (0.2612) time: 0.4719 data: 0.0050 max mem: 22448 +train: [11] [180/400] eta: 0:01:39 lr: 0.000183 loss: 2.2770 (2.2674) grad: 0.2690 (0.2632) time: 0.4414 data: 0.0049 max mem: 22448 +train: [11] [200/400] eta: 0:01:30 lr: 0.000181 loss: 2.2824 (2.2706) grad: 0.2742 (0.2635) time: 0.4527 data: 0.0047 max mem: 22448 +train: [11] [220/400] eta: 0:01:20 lr: 0.000180 loss: 2.3402 (2.2777) grad: 0.2616 (0.2631) time: 0.4212 data: 0.0047 max mem: 22448 +train: [11] [240/400] eta: 0:01:11 lr: 0.000178 loss: 2.3450 (2.2801) grad: 0.2580 (0.2633) time: 0.4700 data: 0.0047 max mem: 22448 +train: [11] [260/400] eta: 0:01:02 lr: 0.000177 loss: 2.2753 (2.2792) grad: 0.2647 (0.2639) time: 0.4342 data: 0.0047 max mem: 22448 +train: [11] [280/400] eta: 0:00:53 lr: 0.000175 loss: 2.2698 (2.2795) grad: 0.2694 (0.2642) time: 0.4449 data: 0.0051 max mem: 22448 +train: [11] [300/400] eta: 0:00:44 lr: 0.000174 loss: 2.2961 (2.2828) grad: 0.2670 (0.2646) time: 0.4356 data: 0.0047 max mem: 22448 +train: [11] [320/400] eta: 0:00:35 lr: 0.000172 loss: 2.3101 (2.2831) grad: 0.2707 (0.2652) time: 0.4297 data: 0.0048 max mem: 22448 +train: [11] [340/400] eta: 0:00:26 lr: 0.000170 loss: 2.2554 (2.2832) grad: 0.2741 (0.2658) time: 0.4284 data: 0.0048 max mem: 22448 +train: [11] [360/400] eta: 0:00:17 lr: 0.000169 loss: 2.2795 (2.2827) grad: 0.2634 (0.2661) time: 0.4343 data: 0.0049 max mem: 22448 +train: [11] [380/400] eta: 0:00:08 lr: 0.000167 loss: 2.2650 (2.2810) grad: 0.2634 (0.2660) time: 0.4282 data: 0.0047 max mem: 22448 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 2.2745 (2.2827) grad: 0.2589 (0.2656) time: 0.4448 data: 0.0048 max mem: 22448 +train: [11] Total time: 0:02:57 (0.4444 s / it) +train: [11] Summary: lr: 0.000166 loss: 2.2745 (2.2827) grad: 0.2589 (0.2656) +eval (validation): [11] [ 0/85] eta: 0:04:29 time: 3.1737 data: 2.8970 max mem: 22448 +eval (validation): [11] [20/85] eta: 0:00:32 time: 0.3606 data: 0.0045 max mem: 22448 +eval (validation): [11] [40/85] eta: 0:00:18 time: 0.3316 data: 0.0034 max mem: 22448 +eval (validation): [11] [60/85] eta: 0:00:09 time: 0.3216 data: 0.0039 max mem: 22448 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3173 data: 0.0041 max mem: 22448 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3115 data: 0.0039 max mem: 22448 +eval (validation): [11] Total time: 0:00:31 (0.3677 s / it) +cv: [11] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.396 acc: 0.285 f1: 0.223 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:21:13 lr: nan time: 3.1849 data: 2.7978 max mem: 22448 +train: [12] [ 20/400] eta: 0:03:41 lr: 0.000164 loss: 2.1565 (2.1681) grad: 0.2502 (0.2502) time: 0.4515 data: 0.0038 max mem: 22448 +train: [12] [ 40/400] eta: 0:03:02 lr: 0.000163 loss: 2.1866 (2.1835) grad: 0.2527 (0.2562) time: 0.4289 data: 0.0049 max mem: 22448 +train: [12] [ 60/400] eta: 0:02:43 lr: 0.000161 loss: 2.1887 (2.1842) grad: 0.2548 (0.2558) time: 0.4306 data: 0.0049 max mem: 22448 +train: [12] [ 80/400] eta: 0:02:30 lr: 0.000160 loss: 2.1887 (2.1931) grad: 0.2541 (0.2567) time: 0.4310 data: 0.0048 max mem: 22448 +train: [12] [100/400] eta: 0:02:19 lr: 0.000158 loss: 2.2145 (2.1983) grad: 0.2606 (0.2578) time: 0.4421 data: 0.0047 max mem: 22448 +train: [12] [120/400] eta: 0:02:08 lr: 0.000156 loss: 2.2145 (2.2032) grad: 0.2573 (0.2567) time: 0.4327 data: 0.0046 max mem: 22448 +train: [12] [140/400] eta: 0:01:58 lr: 0.000155 loss: 2.2230 (2.2026) grad: 0.2559 (0.2590) time: 0.4362 data: 0.0046 max mem: 22448 +train: [12] [160/400] eta: 0:01:49 lr: 0.000153 loss: 2.1949 (2.2038) grad: 0.2710 (0.2610) time: 0.4767 data: 0.0052 max mem: 22448 +train: [12] [180/400] eta: 0:01:40 lr: 0.000152 loss: 2.2053 (2.2043) grad: 0.2710 (0.2617) time: 0.4432 data: 0.0051 max mem: 22448 +train: [12] [200/400] eta: 0:01:30 lr: 0.000150 loss: 2.2237 (2.2122) grad: 0.2637 (0.2622) time: 0.4367 data: 0.0048 max mem: 22448 +train: [12] [220/400] eta: 0:01:21 lr: 0.000149 loss: 2.2731 (2.2169) grad: 0.2634 (0.2623) time: 0.4406 data: 0.0046 max mem: 22448 +train: [12] [240/400] eta: 0:01:12 lr: 0.000147 loss: 2.2125 (2.2133) grad: 0.2649 (0.2636) time: 0.4542 data: 0.0048 max mem: 22448 +train: [12] [260/400] eta: 0:01:03 lr: 0.000145 loss: 2.1867 (2.2146) grad: 0.2697 (0.2639) time: 0.4262 data: 0.0049 max mem: 22448 +train: [12] [280/400] eta: 0:00:54 lr: 0.000144 loss: 2.1840 (2.2114) grad: 0.2650 (0.2639) time: 0.4366 data: 0.0048 max mem: 22448 +train: [12] [300/400] eta: 0:00:45 lr: 0.000142 loss: 2.1840 (2.2127) grad: 0.2684 (0.2648) time: 0.4475 data: 0.0050 max mem: 22448 +train: [12] [320/400] eta: 0:00:35 lr: 0.000141 loss: 2.2146 (2.2137) grad: 0.2750 (0.2649) time: 0.4287 data: 0.0048 max mem: 22448 +train: [12] [340/400] eta: 0:00:26 lr: 0.000139 loss: 2.1829 (2.2136) grad: 0.2662 (0.2647) time: 0.4255 data: 0.0049 max mem: 22448 +train: [12] [360/400] eta: 0:00:17 lr: 0.000138 loss: 2.1829 (2.2129) grad: 0.2626 (0.2647) time: 0.4274 data: 0.0050 max mem: 22448 +train: [12] [380/400] eta: 0:00:08 lr: 0.000136 loss: 2.1965 (2.2137) grad: 0.2621 (0.2647) time: 0.4290 data: 0.0049 max mem: 22448 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 2.1884 (2.2130) grad: 0.2665 (0.2650) time: 0.4358 data: 0.0047 max mem: 22448 +train: [12] Total time: 0:02:58 (0.4455 s / it) +train: [12] Summary: lr: 0.000134 loss: 2.1884 (2.2130) grad: 0.2665 (0.2650) +eval (validation): [12] [ 0/85] eta: 0:04:23 time: 3.1051 data: 2.8749 max mem: 22448 +eval (validation): [12] [20/85] eta: 0:00:30 time: 0.3385 data: 0.0042 max mem: 22448 +eval (validation): [12] [40/85] eta: 0:00:18 time: 0.3265 data: 0.0031 max mem: 22448 +eval (validation): [12] [60/85] eta: 0:00:09 time: 0.3409 data: 0.0044 max mem: 22448 +eval (validation): [12] [80/85] eta: 0:00:01 time: 0.3206 data: 0.0039 max mem: 22448 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3121 data: 0.0039 max mem: 22448 +eval (validation): [12] Total time: 0:00:31 (0.3656 s / it) +cv: [12] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.406 acc: 0.279 f1: 0.216 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:20:58 lr: nan time: 3.1451 data: 2.8142 max mem: 22448 +train: [13] [ 20/400] eta: 0:03:32 lr: 0.000133 loss: 2.1479 (2.1763) grad: 0.2533 (0.2603) time: 0.4308 data: 0.0034 max mem: 22448 +train: [13] [ 40/400] eta: 0:02:59 lr: 0.000131 loss: 2.1713 (2.1700) grad: 0.2591 (0.2614) time: 0.4356 data: 0.0047 max mem: 22448 +train: [13] [ 60/400] eta: 0:02:41 lr: 0.000130 loss: 2.1713 (2.1678) grad: 0.2581 (0.2617) time: 0.4282 data: 0.0046 max mem: 22448 +train: [13] [ 80/400] eta: 0:02:28 lr: 0.000128 loss: 2.1566 (2.1697) grad: 0.2558 (0.2621) time: 0.4316 data: 0.0049 max mem: 22448 +train: [13] [100/400] eta: 0:02:17 lr: 0.000127 loss: 2.1534 (2.1639) grad: 0.2498 (0.2601) time: 0.4330 data: 0.0046 max mem: 22448 +train: [13] [120/400] eta: 0:02:06 lr: 0.000125 loss: 2.1482 (2.1584) grad: 0.2527 (0.2606) time: 0.4273 data: 0.0046 max mem: 22448 +train: [13] [140/400] eta: 0:01:57 lr: 0.000124 loss: 2.1956 (2.1706) grad: 0.2699 (0.2619) time: 0.4379 data: 0.0047 max mem: 22448 +train: [13] [160/400] eta: 0:01:48 lr: 0.000122 loss: 2.1883 (2.1677) grad: 0.2679 (0.2629) time: 0.4650 data: 0.0050 max mem: 22448 +train: [13] [180/400] eta: 0:01:39 lr: 0.000120 loss: 2.1668 (2.1714) grad: 0.2655 (0.2633) time: 0.4541 data: 0.0049 max mem: 22448 +train: [13] [200/400] eta: 0:01:30 lr: 0.000119 loss: 2.1315 (2.1658) grad: 0.2597 (0.2630) time: 0.4345 data: 0.0048 max mem: 22448 +train: [13] [220/400] eta: 0:01:21 lr: 0.000117 loss: 2.1315 (2.1657) grad: 0.2606 (0.2635) time: 0.4508 data: 0.0048 max mem: 22448 +train: [13] [240/400] eta: 0:01:12 lr: 0.000116 loss: 2.1595 (2.1650) grad: 0.2624 (0.2638) time: 0.4739 data: 0.0050 max mem: 22448 +train: [13] [260/400] eta: 0:01:03 lr: 0.000114 loss: 2.1433 (2.1666) grad: 0.2605 (0.2632) time: 0.4312 data: 0.0050 max mem: 22448 +train: [13] [280/400] eta: 0:00:53 lr: 0.000113 loss: 2.1290 (2.1633) grad: 0.2525 (0.2626) time: 0.4254 data: 0.0047 max mem: 22448 +train: [13] [300/400] eta: 0:00:44 lr: 0.000111 loss: 2.1336 (2.1622) grad: 0.2516 (0.2616) time: 0.4518 data: 0.0050 max mem: 22448 +train: [13] [320/400] eta: 0:00:35 lr: 0.000110 loss: 2.1790 (2.1645) grad: 0.2604 (0.2622) time: 0.4296 data: 0.0049 max mem: 22448 +train: [13] [340/400] eta: 0:00:26 lr: 0.000108 loss: 2.1646 (2.1638) grad: 0.2574 (0.2616) time: 0.4261 data: 0.0047 max mem: 22448 +train: [13] [360/400] eta: 0:00:17 lr: 0.000107 loss: 2.1318 (2.1638) grad: 0.2604 (0.2619) time: 0.4289 data: 0.0048 max mem: 22448 +train: [13] [380/400] eta: 0:00:08 lr: 0.000105 loss: 2.1641 (2.1633) grad: 0.2664 (0.2622) time: 0.4427 data: 0.0049 max mem: 22448 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 2.1659 (2.1641) grad: 0.2603 (0.2623) time: 0.4307 data: 0.0048 max mem: 22448 +train: [13] Total time: 0:02:58 (0.4458 s / it) +train: [13] Summary: lr: 0.000104 loss: 2.1659 (2.1641) grad: 0.2603 (0.2623) +eval (validation): [13] [ 0/85] eta: 0:04:22 time: 3.0870 data: 2.8139 max mem: 22448 +eval (validation): [13] [20/85] eta: 0:00:32 time: 0.3662 data: 0.0175 max mem: 22448 +eval (validation): [13] [40/85] eta: 0:00:18 time: 0.3277 data: 0.0039 max mem: 22448 +eval (validation): [13] [60/85] eta: 0:00:09 time: 0.3302 data: 0.0043 max mem: 22448 +eval (validation): [13] [80/85] eta: 0:00:01 time: 0.3263 data: 0.0041 max mem: 22448 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3150 data: 0.0040 max mem: 22448 +eval (validation): [13] Total time: 0:00:31 (0.3709 s / it) +cv: [13] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.386 acc: 0.286 f1: 0.223 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:21:12 lr: nan time: 3.1810 data: 2.8136 max mem: 22448 +train: [14] [ 20/400] eta: 0:03:36 lr: 0.000102 loss: 2.0536 (2.0645) grad: 0.2509 (0.2485) time: 0.4396 data: 0.0032 max mem: 22448 +train: [14] [ 40/400] eta: 0:03:00 lr: 0.000101 loss: 2.0810 (2.0812) grad: 0.2435 (0.2506) time: 0.4265 data: 0.0047 max mem: 22448 +train: [14] [ 60/400] eta: 0:02:42 lr: 0.000099 loss: 2.1006 (2.0871) grad: 0.2562 (0.2536) time: 0.4364 data: 0.0046 max mem: 22448 +train: [14] [ 80/400] eta: 0:02:29 lr: 0.000098 loss: 2.1013 (2.0970) grad: 0.2558 (0.2530) time: 0.4319 data: 0.0049 max mem: 22448 +train: [14] [100/400] eta: 0:02:17 lr: 0.000096 loss: 2.1013 (2.1022) grad: 0.2560 (0.2549) time: 0.4273 data: 0.0046 max mem: 22448 +train: [14] [120/400] eta: 0:02:07 lr: 0.000095 loss: 2.0961 (2.0981) grad: 0.2633 (0.2560) time: 0.4346 data: 0.0047 max mem: 22448 +train: [14] [140/400] eta: 0:01:57 lr: 0.000093 loss: 2.0690 (2.0991) grad: 0.2653 (0.2573) time: 0.4375 data: 0.0048 max mem: 22448 +train: [14] [160/400] eta: 0:01:48 lr: 0.000092 loss: 2.0680 (2.0982) grad: 0.2628 (0.2575) time: 0.4573 data: 0.0049 max mem: 22448 +train: [14] [180/400] eta: 0:01:39 lr: 0.000090 loss: 2.0680 (2.0969) grad: 0.2573 (0.2584) time: 0.4430 data: 0.0047 max mem: 22448 +train: [14] [200/400] eta: 0:01:30 lr: 0.000089 loss: 2.0624 (2.0976) grad: 0.2637 (0.2595) time: 0.4358 data: 0.0050 max mem: 22448 +train: [14] [220/400] eta: 0:01:21 lr: 0.000088 loss: 2.0801 (2.0978) grad: 0.2645 (0.2602) time: 0.4480 data: 0.0048 max mem: 22448 +train: [14] [240/400] eta: 0:01:12 lr: 0.000086 loss: 2.0935 (2.1028) grad: 0.2645 (0.2606) time: 0.4795 data: 0.0052 max mem: 22448 +train: [14] [260/400] eta: 0:01:03 lr: 0.000085 loss: 2.1131 (2.1045) grad: 0.2656 (0.2608) time: 0.4375 data: 0.0051 max mem: 22448 +train: [14] [280/400] eta: 0:00:53 lr: 0.000083 loss: 2.1131 (2.1042) grad: 0.2607 (0.2603) time: 0.4210 data: 0.0048 max mem: 22448 +train: [14] [300/400] eta: 0:00:44 lr: 0.000082 loss: 2.1215 (2.1095) grad: 0.2605 (0.2606) time: 0.4468 data: 0.0051 max mem: 22448 +train: [14] [320/400] eta: 0:00:35 lr: 0.000081 loss: 2.1691 (2.1125) grad: 0.2659 (0.2611) time: 0.4310 data: 0.0047 max mem: 22448 +train: [14] [340/400] eta: 0:00:26 lr: 0.000079 loss: 2.1578 (2.1119) grad: 0.2672 (0.2614) time: 0.4304 data: 0.0047 max mem: 22448 +train: [14] [360/400] eta: 0:00:17 lr: 0.000078 loss: 2.0865 (2.1120) grad: 0.2609 (0.2612) time: 0.4286 data: 0.0049 max mem: 22448 +train: [14] [380/400] eta: 0:00:08 lr: 0.000076 loss: 2.0865 (2.1113) grad: 0.2597 (0.2613) time: 0.4353 data: 0.0050 max mem: 22448 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 2.0958 (2.1107) grad: 0.2640 (0.2616) time: 0.4282 data: 0.0049 max mem: 22448 +train: [14] Total time: 0:02:58 (0.4453 s / it) +train: [14] Summary: lr: 0.000075 loss: 2.0958 (2.1107) grad: 0.2640 (0.2616) +eval (validation): [14] [ 0/85] eta: 0:04:23 time: 3.1017 data: 2.8238 max mem: 22448 +eval (validation): [14] [20/85] eta: 0:00:31 time: 0.3584 data: 0.0050 max mem: 22448 +eval (validation): [14] [40/85] eta: 0:00:18 time: 0.3268 data: 0.0037 max mem: 22448 +eval (validation): [14] [60/85] eta: 0:00:09 time: 0.3269 data: 0.0039 max mem: 22448 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3238 data: 0.0041 max mem: 22448 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3147 data: 0.0040 max mem: 22448 +eval (validation): [14] Total time: 0:00:31 (0.3681 s / it) +cv: [14] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.411 acc: 0.286 f1: 0.233 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:21:01 lr: nan time: 3.1549 data: 2.7820 max mem: 22448 +train: [15] [ 20/400] eta: 0:03:38 lr: 0.000074 loss: 2.0419 (2.0660) grad: 0.2487 (0.2524) time: 0.4452 data: 0.0041 max mem: 22448 +train: [15] [ 40/400] eta: 0:03:00 lr: 0.000072 loss: 2.0419 (2.0625) grad: 0.2487 (0.2506) time: 0.4267 data: 0.0049 max mem: 22448 +train: [15] [ 60/400] eta: 0:02:42 lr: 0.000071 loss: 2.0428 (2.0603) grad: 0.2514 (0.2520) time: 0.4282 data: 0.0049 max mem: 22448 +train: [15] [ 80/400] eta: 0:02:29 lr: 0.000070 loss: 2.0016 (2.0441) grad: 0.2514 (0.2505) time: 0.4286 data: 0.0050 max mem: 22448 +train: [15] [100/400] eta: 0:02:19 lr: 0.000068 loss: 1.9875 (2.0387) grad: 0.2440 (0.2505) time: 0.4570 data: 0.0047 max mem: 22448 +train: [15] [120/400] eta: 0:02:08 lr: 0.000067 loss: 2.0404 (2.0428) grad: 0.2531 (0.2525) time: 0.4346 data: 0.0048 max mem: 22448 +train: [15] [140/400] eta: 0:01:58 lr: 0.000066 loss: 2.0706 (2.0518) grad: 0.2619 (0.2552) time: 0.4437 data: 0.0049 max mem: 22448 +train: [15] [160/400] eta: 0:01:49 lr: 0.000064 loss: 2.0649 (2.0519) grad: 0.2619 (0.2553) time: 0.4614 data: 0.0051 max mem: 22448 +train: [15] [180/400] eta: 0:01:40 lr: 0.000063 loss: 2.0702 (2.0614) grad: 0.2642 (0.2567) time: 0.4449 data: 0.0051 max mem: 22448 +train: [15] [200/400] eta: 0:01:30 lr: 0.000062 loss: 2.0882 (2.0617) grad: 0.2608 (0.2563) time: 0.4361 data: 0.0048 max mem: 22448 +train: [15] [220/400] eta: 0:01:21 lr: 0.000061 loss: 2.0508 (2.0598) grad: 0.2530 (0.2564) time: 0.4571 data: 0.0050 max mem: 22448 +train: [15] [240/400] eta: 0:01:12 lr: 0.000059 loss: 2.0507 (2.0604) grad: 0.2557 (0.2567) time: 0.4655 data: 0.0051 max mem: 22448 +train: [15] [260/400] eta: 0:01:03 lr: 0.000058 loss: 2.1079 (2.0650) grad: 0.2557 (0.2568) time: 0.4325 data: 0.0050 max mem: 22448 +train: [15] [280/400] eta: 0:00:54 lr: 0.000057 loss: 2.0932 (2.0628) grad: 0.2488 (0.2561) time: 0.4222 data: 0.0047 max mem: 22448 +train: [15] [300/400] eta: 0:00:45 lr: 0.000056 loss: 2.0202 (2.0619) grad: 0.2451 (0.2561) time: 0.4424 data: 0.0048 max mem: 22448 +train: [15] [320/400] eta: 0:00:36 lr: 0.000054 loss: 2.0563 (2.0624) grad: 0.2542 (0.2561) time: 0.4413 data: 0.0047 max mem: 22448 +train: [15] [340/400] eta: 0:00:26 lr: 0.000053 loss: 2.0700 (2.0629) grad: 0.2542 (0.2560) time: 0.4424 data: 0.0047 max mem: 22448 +train: [15] [360/400] eta: 0:00:17 lr: 0.000052 loss: 2.0607 (2.0634) grad: 0.2555 (0.2562) time: 0.4352 data: 0.0046 max mem: 22448 +train: [15] [380/400] eta: 0:00:08 lr: 0.000051 loss: 2.0526 (2.0612) grad: 0.2469 (0.2558) time: 0.4359 data: 0.0048 max mem: 22448 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 2.0533 (2.0624) grad: 0.2503 (0.2557) time: 0.4329 data: 0.0048 max mem: 22448 +train: [15] Total time: 0:02:59 (0.4481 s / it) +train: [15] Summary: lr: 0.000050 loss: 2.0533 (2.0624) grad: 0.2503 (0.2557) +eval (validation): [15] [ 0/85] eta: 0:04:26 time: 3.1333 data: 2.8593 max mem: 22448 +eval (validation): [15] [20/85] eta: 0:00:29 time: 0.3220 data: 0.0037 max mem: 22448 +eval (validation): [15] [40/85] eta: 0:00:17 time: 0.3272 data: 0.0034 max mem: 22448 +eval (validation): [15] [60/85] eta: 0:00:09 time: 0.3261 data: 0.0039 max mem: 22448 +eval (validation): [15] [80/85] eta: 0:00:01 time: 0.3187 data: 0.0038 max mem: 22448 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.3102 data: 0.0037 max mem: 22448 +eval (validation): [15] Total time: 0:00:30 (0.3581 s / it) +cv: [15] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.386 acc: 0.289 f1: 0.226 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [16] [ 0/400] eta: 0:21:05 lr: nan time: 3.1647 data: 2.7885 max mem: 22448 +train: [16] [ 20/400] eta: 0:03:32 lr: 0.000048 loss: 1.9752 (2.0308) grad: 0.2400 (0.2422) time: 0.4282 data: 0.0036 max mem: 22448 +train: [16] [ 40/400] eta: 0:02:58 lr: 0.000047 loss: 1.9776 (2.0083) grad: 0.2400 (0.2416) time: 0.4328 data: 0.0046 max mem: 22448 +train: [16] [ 60/400] eta: 0:02:42 lr: 0.000046 loss: 1.9793 (2.0066) grad: 0.2426 (0.2434) time: 0.4419 data: 0.0047 max mem: 22448 +train: [16] [ 80/400] eta: 0:02:29 lr: 0.000045 loss: 2.0110 (2.0182) grad: 0.2508 (0.2461) time: 0.4319 data: 0.0050 max mem: 22448 +train: [16] [100/400] eta: 0:02:18 lr: 0.000044 loss: 2.0110 (2.0150) grad: 0.2513 (0.2470) time: 0.4308 data: 0.0048 max mem: 22448 +train: [16] [120/400] eta: 0:02:08 lr: 0.000043 loss: 2.0113 (2.0163) grad: 0.2509 (0.2476) time: 0.4501 data: 0.0047 max mem: 22448 +train: [16] [140/400] eta: 0:01:59 lr: 0.000042 loss: 2.0173 (2.0211) grad: 0.2471 (0.2478) time: 0.4648 data: 0.0050 max mem: 22448 +train: [16] [160/400] eta: 0:01:49 lr: 0.000041 loss: 2.0320 (2.0273) grad: 0.2498 (0.2495) time: 0.4401 data: 0.0051 max mem: 22448 +train: [16] [180/400] eta: 0:01:40 lr: 0.000040 loss: 2.0379 (2.0269) grad: 0.2568 (0.2496) time: 0.4406 data: 0.0050 max mem: 22448 +train: [16] [200/400] eta: 0:01:30 lr: 0.000039 loss: 2.0108 (2.0235) grad: 0.2474 (0.2490) time: 0.4267 data: 0.0045 max mem: 22448 +train: [16] [220/400] eta: 0:01:22 lr: 0.000038 loss: 2.0108 (2.0255) grad: 0.2422 (0.2488) time: 0.4890 data: 0.0051 max mem: 22448 +train: [16] [240/400] eta: 0:01:12 lr: 0.000036 loss: 2.0265 (2.0263) grad: 0.2467 (0.2495) time: 0.4416 data: 0.0052 max mem: 22448 +train: [16] [260/400] eta: 0:01:03 lr: 0.000035 loss: 2.0274 (2.0288) grad: 0.2546 (0.2499) time: 0.4361 data: 0.0049 max mem: 22448 +train: [16] [280/400] eta: 0:00:54 lr: 0.000034 loss: 2.0274 (2.0287) grad: 0.2535 (0.2500) time: 0.4453 data: 0.0050 max mem: 22448 +train: [16] [300/400] eta: 0:00:45 lr: 0.000033 loss: 2.0219 (2.0293) grad: 0.2512 (0.2501) time: 0.4527 data: 0.0050 max mem: 22448 +train: [16] [320/400] eta: 0:00:36 lr: 0.000032 loss: 2.0797 (2.0333) grad: 0.2553 (0.2506) time: 0.4379 data: 0.0048 max mem: 22448 +train: [16] [340/400] eta: 0:00:27 lr: 0.000031 loss: 2.0257 (2.0317) grad: 0.2484 (0.2505) time: 0.4373 data: 0.0048 max mem: 22448 +train: [16] [360/400] eta: 0:00:18 lr: 0.000031 loss: 1.9961 (2.0309) grad: 0.2479 (0.2508) time: 0.4427 data: 0.0049 max mem: 22448 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 2.0261 (2.0301) grad: 0.2524 (0.2512) time: 0.4483 data: 0.0048 max mem: 22448 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 2.0363 (2.0302) grad: 0.2535 (0.2517) time: 0.4400 data: 0.0048 max mem: 22448 +train: [16] Total time: 0:03:00 (0.4503 s / it) +train: [16] Summary: lr: 0.000029 loss: 2.0363 (2.0302) grad: 0.2535 (0.2517) +eval (validation): [16] [ 0/85] eta: 0:04:24 time: 3.1115 data: 2.8779 max mem: 22448 +eval (validation): [16] [20/85] eta: 0:00:30 time: 0.3412 data: 0.0098 max mem: 22448 +eval (validation): [16] [40/85] eta: 0:00:18 time: 0.3353 data: 0.0033 max mem: 22448 +eval (validation): [16] [60/85] eta: 0:00:09 time: 0.3328 data: 0.0037 max mem: 22448 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3214 data: 0.0039 max mem: 22448 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3136 data: 0.0038 max mem: 22448 +eval (validation): [16] Total time: 0:00:31 (0.3666 s / it) +cv: [16] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.384 acc: 0.288 f1: 0.225 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:21:01 lr: nan time: 3.1527 data: 2.8231 max mem: 22448 +train: [17] [ 20/400] eta: 0:03:44 lr: 0.000028 loss: 1.9618 (1.9614) grad: 0.2284 (0.2356) time: 0.4621 data: 0.0047 max mem: 22448 +train: [17] [ 40/400] eta: 0:03:07 lr: 0.000027 loss: 1.9624 (1.9797) grad: 0.2398 (0.2409) time: 0.4476 data: 0.0046 max mem: 22448 +train: [17] [ 60/400] eta: 0:02:47 lr: 0.000026 loss: 2.0042 (1.9937) grad: 0.2416 (0.2401) time: 0.4392 data: 0.0048 max mem: 22448 +train: [17] [ 80/400] eta: 0:02:34 lr: 0.000025 loss: 1.9863 (1.9839) grad: 0.2398 (0.2398) time: 0.4456 data: 0.0049 max mem: 22448 +train: [17] [100/400] eta: 0:02:22 lr: 0.000024 loss: 1.9926 (1.9920) grad: 0.2338 (0.2400) time: 0.4411 data: 0.0046 max mem: 22448 +train: [17] [120/400] eta: 0:02:10 lr: 0.000023 loss: 1.9823 (1.9899) grad: 0.2453 (0.2408) time: 0.4369 data: 0.0046 max mem: 22448 +train: [17] [140/400] eta: 0:02:01 lr: 0.000023 loss: 1.9607 (1.9883) grad: 0.2449 (0.2410) time: 0.4617 data: 0.0049 max mem: 22448 +train: [17] [160/400] eta: 0:01:51 lr: 0.000022 loss: 1.9627 (1.9861) grad: 0.2421 (0.2419) time: 0.4622 data: 0.0049 max mem: 22448 +train: [17] [180/400] eta: 0:01:42 lr: 0.000021 loss: 1.9871 (1.9865) grad: 0.2420 (0.2414) time: 0.4499 data: 0.0049 max mem: 22448 +train: [17] [200/400] eta: 0:01:32 lr: 0.000020 loss: 1.9803 (1.9866) grad: 0.2390 (0.2412) time: 0.4502 data: 0.0049 max mem: 22448 +train: [17] [220/400] eta: 0:01:23 lr: 0.000019 loss: 2.0336 (1.9892) grad: 0.2420 (0.2420) time: 0.4886 data: 0.0051 max mem: 22448 +train: [17] [240/400] eta: 0:01:14 lr: 0.000019 loss: 2.0122 (1.9871) grad: 0.2420 (0.2423) time: 0.4473 data: 0.0051 max mem: 22448 +train: [17] [260/400] eta: 0:01:04 lr: 0.000018 loss: 1.9829 (1.9873) grad: 0.2428 (0.2427) time: 0.4249 data: 0.0049 max mem: 22448 +train: [17] [280/400] eta: 0:00:55 lr: 0.000017 loss: 1.9829 (1.9883) grad: 0.2463 (0.2428) time: 0.4571 data: 0.0049 max mem: 22448 +train: [17] [300/400] eta: 0:00:45 lr: 0.000016 loss: 1.9982 (1.9903) grad: 0.2390 (0.2426) time: 0.4397 data: 0.0051 max mem: 22448 +train: [17] [320/400] eta: 0:00:36 lr: 0.000016 loss: 1.9880 (1.9900) grad: 0.2309 (0.2419) time: 0.4421 data: 0.0048 max mem: 22448 +train: [17] [340/400] eta: 0:00:27 lr: 0.000015 loss: 1.9928 (1.9900) grad: 0.2354 (0.2423) time: 0.4274 data: 0.0047 max mem: 22448 +train: [17] [360/400] eta: 0:00:18 lr: 0.000014 loss: 2.0088 (1.9912) grad: 0.2463 (0.2425) time: 0.4562 data: 0.0049 max mem: 22448 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 2.0024 (1.9906) grad: 0.2472 (0.2433) time: 0.4571 data: 0.0049 max mem: 22448 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 1.9626 (1.9889) grad: 0.2454 (0.2431) time: 0.4463 data: 0.0050 max mem: 22448 +train: [17] Total time: 0:03:02 (0.4564 s / it) +train: [17] Summary: lr: 0.000013 loss: 1.9626 (1.9889) grad: 0.2454 (0.2431) +eval (validation): [17] [ 0/85] eta: 0:04:30 time: 3.1800 data: 2.9443 max mem: 22448 +eval (validation): [17] [20/85] eta: 0:00:31 time: 0.3473 data: 0.0051 max mem: 22448 +eval (validation): [17] [40/85] eta: 0:00:18 time: 0.3368 data: 0.0038 max mem: 22448 +eval (validation): [17] [60/85] eta: 0:00:09 time: 0.3412 data: 0.0041 max mem: 22448 +eval (validation): [17] [80/85] eta: 0:00:01 time: 0.3298 data: 0.0039 max mem: 22448 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3225 data: 0.0039 max mem: 22448 +eval (validation): [17] Total time: 0:00:31 (0.3741 s / it) +cv: [17] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.379 acc: 0.288 f1: 0.228 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:21:31 lr: nan time: 3.2298 data: 2.8360 max mem: 22448 +train: [18] [ 20/400] eta: 0:03:41 lr: 0.000012 loss: 1.9920 (1.9980) grad: 0.2403 (0.2387) time: 0.4512 data: 0.0050 max mem: 22448 +train: [18] [ 40/400] eta: 0:03:06 lr: 0.000012 loss: 1.9805 (1.9728) grad: 0.2394 (0.2378) time: 0.4495 data: 0.0049 max mem: 22448 +train: [18] [ 60/400] eta: 0:02:47 lr: 0.000011 loss: 1.9326 (1.9695) grad: 0.2446 (0.2415) time: 0.4422 data: 0.0049 max mem: 22448 +train: [18] [ 80/400] eta: 0:02:34 lr: 0.000011 loss: 1.9553 (1.9680) grad: 0.2427 (0.2400) time: 0.4458 data: 0.0052 max mem: 22448 +train: [18] [100/400] eta: 0:02:22 lr: 0.000010 loss: 1.9696 (1.9679) grad: 0.2348 (0.2396) time: 0.4500 data: 0.0050 max mem: 22448 +train: [18] [120/400] eta: 0:02:11 lr: 0.000009 loss: 1.9253 (1.9631) grad: 0.2357 (0.2391) time: 0.4416 data: 0.0050 max mem: 22448 +train: [18] [140/400] eta: 0:02:01 lr: 0.000009 loss: 1.9639 (1.9661) grad: 0.2351 (0.2390) time: 0.4547 data: 0.0049 max mem: 22448 +train: [18] [160/400] eta: 0:01:51 lr: 0.000008 loss: 1.9701 (1.9671) grad: 0.2350 (0.2384) time: 0.4503 data: 0.0049 max mem: 22448 +train: [18] [180/400] eta: 0:01:41 lr: 0.000008 loss: 1.9372 (1.9640) grad: 0.2335 (0.2388) time: 0.4372 data: 0.0047 max mem: 22448 +train: [18] [200/400] eta: 0:01:32 lr: 0.000007 loss: 1.9474 (1.9655) grad: 0.2381 (0.2392) time: 0.4511 data: 0.0049 max mem: 22448 +train: [18] [220/400] eta: 0:01:23 lr: 0.000007 loss: 1.9739 (1.9665) grad: 0.2376 (0.2391) time: 0.4756 data: 0.0050 max mem: 22448 +train: [18] [240/400] eta: 0:01:13 lr: 0.000006 loss: 1.9759 (1.9696) grad: 0.2379 (0.2390) time: 0.4511 data: 0.0049 max mem: 22448 +train: [18] [260/400] eta: 0:01:04 lr: 0.000006 loss: 1.9768 (1.9689) grad: 0.2379 (0.2390) time: 0.4282 data: 0.0046 max mem: 22448 +train: [18] [280/400] eta: 0:00:55 lr: 0.000006 loss: 1.9696 (1.9700) grad: 0.2415 (0.2391) time: 0.4654 data: 0.0049 max mem: 22448 +train: [18] [300/400] eta: 0:00:45 lr: 0.000005 loss: 1.9797 (1.9693) grad: 0.2350 (0.2388) time: 0.4453 data: 0.0049 max mem: 22448 +train: [18] [320/400] eta: 0:00:36 lr: 0.000005 loss: 1.9888 (1.9717) grad: 0.2350 (0.2389) time: 0.4403 data: 0.0049 max mem: 22448 +train: [18] [340/400] eta: 0:00:27 lr: 0.000004 loss: 1.9683 (1.9699) grad: 0.2316 (0.2388) time: 0.4341 data: 0.0050 max mem: 22448 +train: [18] [360/400] eta: 0:00:18 lr: 0.000004 loss: 1.9566 (1.9695) grad: 0.2331 (0.2385) time: 0.4620 data: 0.0049 max mem: 22448 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 1.9751 (1.9702) grad: 0.2376 (0.2386) time: 0.4456 data: 0.0047 max mem: 22448 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 1.9430 (1.9681) grad: 0.2376 (0.2387) time: 0.4539 data: 0.0047 max mem: 22448 +train: [18] Total time: 0:03:02 (0.4563 s / it) +train: [18] Summary: lr: 0.000003 loss: 1.9430 (1.9681) grad: 0.2376 (0.2387) +eval (validation): [18] [ 0/85] eta: 0:04:21 time: 3.0822 data: 2.7922 max mem: 22448 +eval (validation): [18] [20/85] eta: 0:00:32 time: 0.3678 data: 0.0052 max mem: 22448 +eval (validation): [18] [40/85] eta: 0:00:19 time: 0.3520 data: 0.0035 max mem: 22448 +eval (validation): [18] [60/85] eta: 0:00:10 time: 0.3587 data: 0.0040 max mem: 22448 +eval (validation): [18] [80/85] eta: 0:00:01 time: 0.3359 data: 0.0042 max mem: 22448 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3313 data: 0.0042 max mem: 22448 +eval (validation): [18] Total time: 0:00:32 (0.3866 s / it) +cv: [18] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.385 acc: 0.286 f1: 0.225 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:20:44 lr: nan time: 3.1119 data: 2.7425 max mem: 22448 +train: [19] [ 20/400] eta: 0:03:37 lr: 0.000003 loss: 1.9595 (1.9634) grad: 0.2261 (0.2319) time: 0.4440 data: 0.0043 max mem: 22448 +train: [19] [ 40/400] eta: 0:03:02 lr: 0.000003 loss: 1.9436 (1.9417) grad: 0.2304 (0.2347) time: 0.4372 data: 0.0048 max mem: 22448 +train: [19] [ 60/400] eta: 0:02:44 lr: 0.000002 loss: 1.9461 (1.9610) grad: 0.2304 (0.2350) time: 0.4427 data: 0.0047 max mem: 22448 +train: [19] [ 80/400] eta: 0:02:31 lr: 0.000002 loss: 1.9892 (1.9667) grad: 0.2329 (0.2346) time: 0.4367 data: 0.0045 max mem: 22448 +train: [19] [100/400] eta: 0:02:20 lr: 0.000002 loss: 1.9648 (1.9667) grad: 0.2359 (0.2353) time: 0.4434 data: 0.0047 max mem: 22448 +train: [19] [120/400] eta: 0:02:09 lr: 0.000002 loss: 1.9721 (1.9692) grad: 0.2388 (0.2362) time: 0.4475 data: 0.0046 max mem: 22448 +train: [19] [140/400] eta: 0:01:59 lr: 0.000001 loss: 1.9893 (1.9679) grad: 0.2377 (0.2359) time: 0.4444 data: 0.0046 max mem: 22448 +train: [19] [160/400] eta: 0:01:50 lr: 0.000001 loss: 1.9465 (1.9640) grad: 0.2305 (0.2352) time: 0.4445 data: 0.0048 max mem: 22448 +train: [19] [180/400] eta: 0:01:40 lr: 0.000001 loss: 1.9383 (1.9604) grad: 0.2350 (0.2361) time: 0.4310 data: 0.0047 max mem: 22448 +train: [19] [200/400] eta: 0:01:30 lr: 0.000001 loss: 1.9423 (1.9603) grad: 0.2371 (0.2358) time: 0.4456 data: 0.0048 max mem: 22448 +train: [19] [220/400] eta: 0:01:21 lr: 0.000001 loss: 1.9423 (1.9581) grad: 0.2304 (0.2354) time: 0.4482 data: 0.0049 max mem: 22448 +train: [19] [240/400] eta: 0:01:12 lr: 0.000001 loss: 1.9200 (1.9566) grad: 0.2304 (0.2355) time: 0.4385 data: 0.0049 max mem: 22448 +train: [19] [260/400] eta: 0:01:03 lr: 0.000000 loss: 1.9200 (1.9544) grad: 0.2356 (0.2352) time: 0.4409 data: 0.0048 max mem: 22448 +train: [19] [280/400] eta: 0:00:54 lr: 0.000000 loss: 1.9532 (1.9574) grad: 0.2314 (0.2350) time: 0.4770 data: 0.0049 max mem: 22448 +train: [19] [300/400] eta: 0:00:45 lr: 0.000000 loss: 1.9731 (1.9574) grad: 0.2310 (0.2348) time: 0.4598 data: 0.0050 max mem: 22448 +train: [19] [320/400] eta: 0:00:36 lr: 0.000000 loss: 1.9576 (1.9604) grad: 0.2333 (0.2352) time: 0.4483 data: 0.0050 max mem: 22448 +train: [19] [340/400] eta: 0:00:27 lr: 0.000000 loss: 1.9944 (1.9627) grad: 0.2333 (0.2351) time: 0.4342 data: 0.0047 max mem: 22448 +train: [19] [360/400] eta: 0:00:18 lr: 0.000000 loss: 1.9807 (1.9625) grad: 0.2311 (0.2349) time: 0.4604 data: 0.0050 max mem: 22448 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 1.9807 (1.9627) grad: 0.2335 (0.2349) time: 0.4519 data: 0.0051 max mem: 22448 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 1.9979 (1.9650) grad: 0.2341 (0.2350) time: 0.4539 data: 0.0049 max mem: 22448 +train: [19] Total time: 0:03:01 (0.4537 s / it) +train: [19] Summary: lr: 0.000000 loss: 1.9979 (1.9650) grad: 0.2341 (0.2350) +eval (validation): [19] [ 0/85] eta: 0:04:32 time: 3.2081 data: 2.9671 max mem: 22448 +eval (validation): [19] [20/85] eta: 0:00:33 time: 0.3736 data: 0.0042 max mem: 22448 +eval (validation): [19] [40/85] eta: 0:00:19 time: 0.3708 data: 0.0037 max mem: 22448 +eval (validation): [19] [60/85] eta: 0:00:10 time: 0.3261 data: 0.0038 max mem: 22448 +eval (validation): [19] [80/85] eta: 0:00:01 time: 0.3575 data: 0.0043 max mem: 22448 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.3474 data: 0.0040 max mem: 22448 +eval (validation): [19] Total time: 0:00:33 (0.3918 s / it) +cv: [19] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.384 acc: 0.287 f1: 0.226 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.2870062753783684, "hparam": [0.38, 1.0], "hparam_id": 18, "epoch": 19, "is_best": false, "best_score": 0.28903654485049834} +eval (train): [20] [ 0/509] eta: 0:25:43 time: 3.0323 data: 2.7297 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:00 time: 0.3653 data: 0.0040 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:22 time: 0.3696 data: 0.0039 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:03 time: 0.3627 data: 0.0045 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:51 time: 0.3657 data: 0.0044 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:41 time: 0.3749 data: 0.0041 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:31 time: 0.3588 data: 0.0042 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:20 time: 0.3388 data: 0.0038 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:12 time: 0.3712 data: 0.0044 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:04 time: 0.3561 data: 0.0038 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:55 time: 0.3391 data: 0.0038 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:47 time: 0.3664 data: 0.0043 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:40 time: 0.3812 data: 0.0046 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:32 time: 0.3572 data: 0.0041 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:25 time: 0.3921 data: 0.0046 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:17 time: 0.3642 data: 0.0041 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:10 time: 0.3385 data: 0.0038 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:02 time: 0.3602 data: 0.0043 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:55 time: 0.3987 data: 0.0046 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:47 time: 0.3696 data: 0.0041 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:40 time: 0.4278 data: 0.0785 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:33 time: 0.3696 data: 0.0327 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:25 time: 0.3793 data: 0.0037 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:18 time: 0.3619 data: 0.0042 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3510 data: 0.0041 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3549 data: 0.0041 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3359 data: 0.0039 max mem: 22448 +eval (train): [20] Total time: 0:03:09 (0.3729 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:24 time: 3.1175 data: 2.8553 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:31 time: 0.3610 data: 0.0164 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:18 time: 0.3482 data: 0.0039 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3672 data: 0.0035 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3407 data: 0.0039 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3287 data: 0.0038 max mem: 22448 +eval (validation): [20] Total time: 0:00:32 (0.3877 s / it) +eval (test): [20] [ 0/85] eta: 0:03:49 time: 2.6985 data: 2.4299 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:30 time: 0.3555 data: 0.0040 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:18 time: 0.3405 data: 0.0035 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:09 time: 0.3322 data: 0.0036 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3305 data: 0.0040 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3197 data: 0.0040 max mem: 22448 +eval (test): [20] Total time: 0:00:31 (0.3684 s / it) +eval (testid): [20] [ 0/82] eta: 0:03:42 time: 2.7118 data: 2.4396 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:29 time: 0.3585 data: 0.0049 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:16 time: 0.3350 data: 0.0037 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3309 data: 0.0039 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3251 data: 0.0041 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3144 data: 0.0039 max mem: 22448 +eval (testid): [20] Total time: 0:00:30 (0.3675 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.28903654485049834, "hparam": [0.38, 1.0], "hparam_id": 18, "epoch": 15, "is_best": true, "best_score": 0.28903654485049834} +eval (train): [20] [ 0/509] eta: 0:23:39 time: 2.7893 data: 2.5041 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:03 time: 0.3830 data: 0.0137 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:22 time: 0.3615 data: 0.0035 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:00 time: 0.3401 data: 0.0039 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:46 time: 0.3465 data: 0.0038 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:35 time: 0.3444 data: 0.0040 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:26 time: 0.3705 data: 0.0044 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:19 time: 0.3885 data: 0.0046 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:11 time: 0.3478 data: 0.0042 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:03 time: 0.3657 data: 0.0044 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:55 time: 0.3623 data: 0.0044 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:47 time: 0.3597 data: 0.0040 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:39 time: 0.3426 data: 0.0038 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:31 time: 0.3631 data: 0.0042 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:24 time: 0.3711 data: 0.0042 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:18 time: 0.4514 data: 0.0977 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:11 time: 0.4000 data: 0.0511 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:03 time: 0.3844 data: 0.0039 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:55 time: 0.3474 data: 0.0041 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:48 time: 0.3440 data: 0.0038 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:40 time: 0.3761 data: 0.0048 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:33 time: 0.3503 data: 0.0042 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:25 time: 0.3784 data: 0.0045 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:18 time: 0.3524 data: 0.0042 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3477 data: 0.0040 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3436 data: 0.0039 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3287 data: 0.0039 max mem: 22448 +eval (train): [20] Total time: 0:03:08 (0.3704 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:27 time: 3.1488 data: 2.8497 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:32 time: 0.3675 data: 0.0037 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:19 time: 0.3639 data: 0.0041 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3739 data: 0.0045 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3384 data: 0.0044 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3301 data: 0.0042 max mem: 22448 +eval (validation): [20] Total time: 0:00:33 (0.3950 s / it) +eval (test): [20] [ 0/85] eta: 0:04:20 time: 3.0630 data: 2.7538 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:35 time: 0.4182 data: 0.0050 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:20 time: 0.3718 data: 0.0038 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3551 data: 0.0042 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:02 time: 0.3723 data: 0.0047 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3615 data: 0.0044 max mem: 22448 +eval (test): [20] Total time: 0:00:34 (0.4104 s / it) +eval (testid): [20] [ 0/82] eta: 0:03:53 time: 2.8458 data: 2.5991 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:31 time: 0.3903 data: 0.0044 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3663 data: 0.0042 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:09 time: 0.3688 data: 0.0041 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3344 data: 0.0041 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3225 data: 0.0039 max mem: 22448 +eval (testid): [20] Total time: 0:00:32 (0.3956 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|---------:|-----:|------------:|:------------|:-----------|-------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | nsd_cococlip | best | 15 | 0.000114 | 0.05 | 18 | [0.38, 1.0] | train | 2.0526 | 0.38455 | 0.0025285 | 0.32767 | 0.002691 | +| flat_mae | patch | attn | nsd_cococlip | best | 15 | 0.000114 | 0.05 | 18 | [0.38, 1.0] | validation | 2.3857 | 0.28904 | 0.0056722 | 0.22604 | 0.0052922 | +| flat_mae | patch | attn | nsd_cococlip | best | 15 | 0.000114 | 0.05 | 18 | [0.38, 1.0] | test | 2.2418 | 0.31633 | 0.0054211 | 0.24595 | 0.0054264 | +| flat_mae | patch | attn | nsd_cococlip | best | 15 | 0.000114 | 0.05 | 18 | [0.38, 1.0] | testid | 2.2627 | 0.30846 | 0.0057881 | 0.25108 | 0.0054748 | + + +done! total time: 1:23:24 diff --git a/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/train_log.json b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..c7b14b1810d00bf2326cf099669694fbfb6840e7 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/nsd_cococlip__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.1225267338752745, "train/grad": 0.15737947408109904, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1677392578125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.167196044921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.166190185546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.16533203125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1644970703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.16334716796875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.162115478515625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.160902099609375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.159361572265625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.157947998046875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.156578369140625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1548828125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.153482666015625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.151678466796875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.15015380859375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1490625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.147821044921875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.14658447265625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.145418701171875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.14446044921875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.143515625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.14266845703125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1419580078125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.14126220703125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.140831298828125, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.140399169921875, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.140181884765625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.14006103515625, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.140133056640625, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.140225830078125, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.14035888671875, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.140631103515625, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.14090087890625, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.1410516357421874, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.141231689453125, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.14135986328125, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.14147705078125, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1408038330078125, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1357070922851564, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.1165841674804686, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.097228698730469, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.076693878173828, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0501013946533204, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.026643981933594, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.003509979248047, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.9829021453857423, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.963673725128174, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.9451170921325684, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.931847629547119, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02292362215463072, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022873630807735027, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022792857331223784, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022716022687964143, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022642264743335545, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02254595740698278, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022443977664224804, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02233773118816316, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022210729024372995, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02209002744872123, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02198271909262985, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021842403090558946, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021729056946933268, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021591781862080096, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021485715806484222, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021404305705800653, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021321230507455765, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02124407843220979, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021177711081691086, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02112902421504259, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.021083053671754896, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.021049854275770486, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.021024433923885227, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.021006465856917202, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02099887905176729, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.021000421666540207, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.021008672825992107, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.021021766080521045, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02104254547506571, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.021065839766524732, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02107873815111816, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0210815661912784, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.021065405015833677, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02102868478279561, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02095618232153356, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.020852384101599456, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02071333959698677, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.020591797181405126, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02060911311302334, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.021127177593298255, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02194213560782373, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.022595896646380426, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0234523140033707, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.024177443739026784, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02513585634995252, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02625869182869792, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.027119642817415298, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02830879911314696, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.02963380207307637, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1551382541656494, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1533899307250977, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.150686264038086, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1482579708099365, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1459927558898926, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1431591510772705, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.140296697616577, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1374621391296387, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1344432830810547, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.131840705871582, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.129763126373291, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.127488374710083, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1258962154388428, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1244094371795654, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.123539686203003, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1230313777923584, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1226260662078857, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.122375249862671, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1222314834594727, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1222574710845947, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1224582195281982, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1228973865509033, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.123713493347168, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1249325275421143, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1264150142669678, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.128296375274658, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1298270225524902, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.131091833114624, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1320366859436035, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1314289569854736, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1294007301330566, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1269075870513916, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.123408794403076, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.120314836502075, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.117030382156372, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.113952398300171, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.105846643447876, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0710623264312744, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9510600566864014, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.76031494140625, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6900932788848877, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.6342451572418213, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.60929012298584, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6137311458587646, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.5764071941375732, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.560702323913574, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.535959005355835, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.541729688644409, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.6207358837127686, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06312292358803986, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07290513104466592, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07345884090070137, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06921373200442968, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06293835363602805, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.05703211517165006, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.053156146179401995, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.05260243632336656, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.053156146179401995, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.05721668512366187, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.06183093392395718, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.06829088224437062, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.07050572166851236, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.0754891103728313, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07401255075673681, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.07825765965300849, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08471760797342193, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.09394610557401255, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.12495385751199704, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.1790328534514581, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19896640826873385, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21797711332595054, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21502399409376152, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2207456626061277, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.23624953857511996, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.2427094868955334, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.23864894795127353, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.23440383905500184, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.2188999630860096, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0056293551404493275, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.005641690147044822, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.004940310616597126, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.004938593442243077, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.004941169651654784, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.004954798331015299, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.004954798331015299, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.005217429321686409, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.005424775039937442, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.006491938914346543, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.007346181563368468, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.010070294740594286, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.011760238153263339, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.011460304803672342, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.010881085264276133, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.010365676616418887, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.010389660748993295, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.010091133985340554, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.010651274849441499, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.011659837104428236, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.012573581245928495, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.014632472064527493, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.016357152523558863, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.017472808355854207, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.013762184119304502, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.010440791408855746, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.0071640621248578755, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.006595367432328002, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.00676018866034752, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.009135719887969476, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.009633422385277262, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.010384915950679535, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.010852580988094551, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.016192774481885663, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.019558756610896257, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.023929500845933014, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.025415932836856944, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.034061851909628536, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.06205908434253754, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.10087033061742068, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.11877270714173727, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.13812692619214273, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1436631811375176, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1587478246987908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.173432679019306, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.1743861984433217, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.16380462510645585, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.15862714760850513, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.13730109688684836, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 2.9829021453857423, "validation/loss_best": 2.560702323913574, "validation/acc_best": 0.2427094868955334, "validation/f1_best": 0.1743861984433217} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 2.953751002550125, "train/grad": 0.18355563715100287, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.14649169921875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.144482421875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.14168212890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13932861328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.137528076171875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1356005859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13376953125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.132454833984375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.13115478515625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.13013916015625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.129510498046875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.128948974609375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.128516845703125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.128209228515625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1280419921875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.127938232421875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.127701416015625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.12756103515625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.12732666015625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.12700927734375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.126636962890625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.126287841796875, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.12588134765625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.12541015625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.1249609375, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.124176025390625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.12351806640625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.12261962890625, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.119024658203125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0966131591796877, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0499917602539064, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.000845489501953, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.9332545471191405, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.8738489532470703, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.80200309753418, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.7404911804199217, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.679804592132568, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.625197410583496, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.5782653999328615, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.545799345970154, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.535031223297119, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.523520040512085, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5190140056610106, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.523273606300354, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.524249348640442, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.5438807582855225, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.5784681606292725, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.620129424333572, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.668209209442139, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02095817307010293, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020785490605048834, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020548910475336014, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020363202104344964, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020218439865857363, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020064420038834213, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019944288400001824, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019855244578793646, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01978428686503321, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019744046893902124, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01972562480252236, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01972304457332939, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019732061685062946, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019754644711501897, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019780233153142035, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.019804736617952585, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01983735378831625, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019872390828095377, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019909150977618993, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.019941070266067982, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.019975348273292184, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02000513586215675, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02002738884650171, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.020033079888671635, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.020008147400803863, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01992849961388856, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.019811656456440688, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.019669646094553173, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.019472138239070774, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.019665162661112845, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.020485564172267913, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02133705424144864, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02261662462260574, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02379174195230007, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.025040294132195412, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.026225394359789787, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.027504839380271734, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.029059217208996416, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.030657032132148744, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.032288816142827276, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.033295232821255925, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.034469560254365206, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03577660737559199, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.037092189192771914, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03816365440376103, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.039669069014489654, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04145557938143611, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.045480943247675897, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.05095288070850074, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.13321590423584, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.131080389022827, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.128459930419922, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1266868114471436, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.125462055206299, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.124379873275757, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.123685598373413, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.123225688934326, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1229209899902344, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.122758388519287, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1226463317871094, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1224563121795654, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1222376823425293, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1218740940093994, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.121464252471924, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1210217475891113, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.12052059173584, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.119997262954712, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.119615316390991, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1195220947265625, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1197164058685303, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.120161533355713, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1210930347442627, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.122828483581543, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1255064010620117, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.129249334335327, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1313014030456543, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.128261089324951, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0901248455047607, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8740177154541016, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7222518920898438, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.6493782997131348, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.5785973072052, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5453739166259766, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5381991863250732, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5489723682403564, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.498741626739502, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.458789587020874, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.4553284645080566, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.445276975631714, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.430194139480591, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.428544759750366, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.473689317703247, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.5033111572265625, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.6027324199676514, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.6150670051574707, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.6553938388824463, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.7946014404296875, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1614201068878174, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.067921742340347, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07198228128460686, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07087486157253599, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.0725359911406423, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.07438169066076043, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.07825765965300849, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09265411590992986, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.14691768180140274, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.17423403469915097, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.19878183831672203, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22240679217423404, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23440383905500184, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23624953857511996, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23108157991878922, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24363233665559247, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.25655223329641935, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.26411960132890366, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2652270210409745, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.26559616094499816, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.26799557032115173, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2497231450719823, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.24621631598375784, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.2321889996308601, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.2307124400147656, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.21520856404577335, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.19896640826873385, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.16961978589885568, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.005172470760958509, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.0055886271801353395, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0071586915265459475, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.008025805235682587, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.009266019032674834, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.009980000631016133, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.010340499226912242, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011161817953602023, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011193149065741767, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.011298701277238825, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01070742729850179, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.011515688273922055, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.011564799709925903, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01190554092100342, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.012665269684084187, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.013963352022804678, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.015227529834369832, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01616312993979688, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.016595904033000906, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.017721327748655282, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.018384834222373275, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.018559520383777603, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.01793877713010313, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.015596129234669362, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.014660295697753492, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.015185362395772223, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.016178913539923654, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.017759217175268478, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.030116025607827255, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.06652672345660904, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.08842385222850847, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.11626840393820496, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.14932737948434058, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16397319097780333, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16835894626493753, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1619818097774488, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17305970598239018, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1899075770727717, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19834025541077227, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.2005745176833074, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.19518456744519921, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.20624831182038592, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18123577743114785, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.17258526143794262, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.16810247566436623, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.17478159451538489, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.14489276680314409, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.1394922814940233, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.11180010683404767, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 2.523520040512085, "validation/loss_best": 2.428544759750366, "validation/acc_best": 0.26799557032115173, "validation/f1_best": 0.20624831182038592} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 2.962794350385666, "train/grad": 0.40556550443172457, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13543212890625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.134385986328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.133424072265625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1328564453125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.132652587890625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.132362060546875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.132237548828125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.132049560546875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.131964111328125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.13173583984375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.13145751953125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.13107421875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1307666015625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.13020263671875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.12964599609375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.12918701171875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.128409423828125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.127730712890625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.126915283203125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.126104736328125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.125098876953125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.123634033203125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.120889892578125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.1111981201171877, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.071710510253906, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.9828970336914065, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.8894087219238282, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.804883499145508, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.694900436401367, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.5896636962890627, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.5300409507751467, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.4899481296539308, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.4474860763549806, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.416652774810791, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.3879447841644286, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.369513473510742, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.3597284078598024, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.354664776325226, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.3596968412399293, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.369290580749512, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3860310220718386, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4035022974014284, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4227180671691895, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.444182813167572, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.4961917328834535, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.603175938129425, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.6824561750888822, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.926881608963012, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.5919406187534335, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019993852428160607, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01992397792171687, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01986521407496184, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019845966077409686, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019845734909176828, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01985595241189003, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019874987294897437, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01989558320026845, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019920589555986225, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019942670329473913, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019962723599746823, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01998892455827445, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02000938203651458, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020038305106572805, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020059046726673843, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020075682974420486, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020089988075196744, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020095147690735757, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02008426111191511, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02005592825356871, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01999091221485287, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.019886572519317268, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01972254308871925, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.019614380151033402, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.020212639276869594, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02202716071624309, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0236291244905442, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.025122908144257963, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02715333133004606, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.028929886287078262, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.029954885048791766, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.030578913232311607, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.031502444092184306, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.032267939448356625, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03321861360222101, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.033957887813448905, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.035038162050768734, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.03629237963818013, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.037724105678498744, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0386826337967068, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03976615119725466, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0412087183073163, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04375699903815985, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04448460005223751, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.048065097741782664, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05584229301661253, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.05914480371400714, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.13405061597004533, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.15914105225354433, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.124715566635132, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.124119281768799, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123593330383301, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.123323678970337, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.123170852661133, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1230850219726562, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1230404376983643, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.123013734817505, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1230177879333496, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1230642795562744, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.123124837875366, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1233389377593994, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.123533248901367, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1238248348236084, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1240241527557373, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.124093770980835, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.123833417892456, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1229372024536133, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.121290922164917, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.119107484817505, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.11552357673645, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1101016998291016, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0963571071624756, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.022826910018921, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.796452045440674, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6370127201080322, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.569979429244995, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5220882892608643, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.47564435005188, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.443286180496216, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.423131227493286, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.416086196899414, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.4106271266937256, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.4150915145874023, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.459040403366089, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.470196008682251, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.4794862270355225, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.477074146270752, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.5114073753356934, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.5231130123138428, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.5021862983703613, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.5473029613494873, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.5699877738952637, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.5292506217956543, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.607591390609741, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.7779428958892822, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.873404026031494, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07069029162052418, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07069029162052418, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07198228128460686, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.0799187892211148, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.10834256183093392, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.17220376522702105, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.20782576596530086, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24584717607973422, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2593207825765965, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.26541159099298633, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2692875599852344, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.26965669988925806, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.26947212993724623, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.26836471022517533, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.26375046142488, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2567368032484312, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2511997046880768, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2484311554078996, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.23754152823920266, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23421926910299004, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23791066814322628, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2364341085271318, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23237356958287192, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.25083056478405313, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.23569582871908454, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.2074566260612772, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.19269102990033224, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009768890865128508, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.0098098005087237, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.009921362121044004, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.00993148452133114, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.00967542587980489, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.009858397559070232, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.010134011474819146, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01025957195206509, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.010429469324941343, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.011207166053832799, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01226341431009406, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.013787935986537546, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013854751452650532, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.013772508218881955, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01335110513926066, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.012418359912803456, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.012100299453072061, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.0122145798010721, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.012993451644657053, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01380594592469342, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.0144398537955368, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.014997836379453422, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.017912154156666537, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03210203196564628, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.08006901328734718, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.11893899081385895, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.15045587146794534, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.16640979010187265, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1818639053506903, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18810904319506974, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19294012881513742, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19466886249613405, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19811578895755924, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.20434273066840483, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.20691529430464198, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.20059362313086213, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.19335534938748467, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1963896624093522, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18900832604925707, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18284123507015573, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1853359518790594, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1708950320709601, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1666526914419153, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.17617059967011206, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.17745623553727474, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.1686749097808445, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.15492870213612403, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 2.4899481296539308, "validation/loss_best": 2.416086196899414, "validation/acc_best": 0.26965669988925806, "validation/f1_best": 0.19466886249613405} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 2.931470857858658, "train/grad": 0.4540255086123943, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.130303955078125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.13004638671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.129683837890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.129365234375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.129176025390625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.128790283203125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.128446044921875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12801025390625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1274755859375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12682373046875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.126304931640625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.125439453125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.12464111328125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.123470458984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.122381591796875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.121307373046875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.11987548828125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.11804443359375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.115091552734375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.1086395263671873, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.07489013671875, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.9796087646484377, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.8509202575683594, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.718904495239258, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.6113384628295897, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.532292671203613, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.4804975891113283, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.440371208190918, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.3981154632568358, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.361592884063721, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.3371619892120363, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.3159584045410155, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.292825117111206, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.2779287338256835, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.26614248752594, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.268134813308716, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2794804215431212, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.279706308841705, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2978103494644166, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.340460879802704, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3925855779647827, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.426869434118271, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5242289888858793, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.57735143661499, "train/loss_044_lr2.6e+01_wd1.0e+00": 5.16419304728508, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.906976038217545, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.408660804033279, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019752014228142798, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01976213974878192, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01978373933583498, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01980236925650388, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019816971975378694, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019834768720902504, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01985098678153008, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019866340882144867, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01988439912442118, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019901583436876535, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01991420707665384, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019928908846341072, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01993599598761648, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019936433909460902, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01992674381006509, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01990497936028987, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019857187643647193, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019774105134420097, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019644509176723658, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01951960221864283, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01992544923443347, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.021744933887384833, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02435323762707412, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02688157690688968, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.028779539838433267, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.029765088791027666, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.030708611393347383, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.031564411018043755, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03246090104803443, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03336510938592255, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03411302961409092, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03492188007570803, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0362463135458529, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03754500700160861, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03919476628303528, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0399055920727551, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0410330945905298, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.041325308782979846, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04261475783772767, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.044716282580047846, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04663927806541324, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.048685003221035, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05592788992449641, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05890697997063398, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.17544517101719975, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.118453226685524, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.10792035128921271, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123152256011963, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1229748725891113, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1227195262908936, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1224567890167236, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1222286224365234, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1218364238739014, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1214001178741455, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.120873212814331, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1201670169830322, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1194369792938232, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1187613010406494, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1177978515625, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.117055654525757, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1160905361175537, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1153690814971924, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1147990226745605, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.114011526107788, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.112208843231201, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1053671836853027, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0742342472076416, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.8904826641082764, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.7039718627929688, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.6090598106384277, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.549044609069824, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5178451538085938, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4886903762817383, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4710068702697754, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4711601734161377, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4768872261047363, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4714276790618896, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.4568653106689453, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4377281665802, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.4030752182006836, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.4466512203216553, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5016069412231445, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5421011447906494, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.596827507019043, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.5340795516967773, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6075327396392822, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7364072799682617, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.665431022644043, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.6381728649139404, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7908501625061035, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.135465621948242, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07345884090070137, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07401255075673681, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0753045404208195, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07604282022886674, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07438169066076043, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07862679955703211, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.09597637504614248, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.1493170911775563, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.18549280177187155, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2070874861572536, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22462163159837578, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23624953857511996, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24880029531192321, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24861572535991142, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.25193798449612403, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2541528239202658, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.257844222960502, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2572905131044666, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2604282022886674, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2724252491694352, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.26707272056109266, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2561830933923957, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.24437061646363972, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24455518641565152, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2497231450719823, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2235142118863049, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20782576596530086, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23477297895902546, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2294204503506829, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22517534145441123, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.19010705057216684, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01155115980462261, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011917098146841389, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012204923693410885, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012583445199401739, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012838440222853617, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01277246851194296, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01342434092704514, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01398141782090534, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013879710507472725, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014142495224400304, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.014399585960145317, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.014689989255286182, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.016702555865465027, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.017810321029867435, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.018745427889779967, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01915088763421482, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.019808930631689044, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.020591465560400347, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.026322144419161347, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.03995988767110577, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.07653455026617333, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.10946960607863243, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.13118045880067447, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.14678917365026978, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.16111286494136823, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1796180594043765, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18806662897874174, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19397654818863952, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19657425071307624, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20193020954641472, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19823178052449666, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1975445395087514, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1979246234917488, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1887605670594574, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18333931472136214, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1871351681831294, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18070034285378064, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19489575897169073, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18638792123252848, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16802559998903197, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17172301345065977, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1782773972266859, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16707138238110397, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.12783694497395362, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 2.292825117111206, "validation/loss_best": 2.4030752182006836, "validation/acc_best": 0.2724252491694352, "validation/f1_best": 0.1979246234917488} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 2.745159590244293, "train/grad": 0.22888885550200938, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.132081298828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.131741943359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1313330078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.130809326171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.130400390625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.129827880859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1292236328125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12853759765625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1276171875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.126597900390625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1257470703125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12430419921875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1230322265625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.120965576171875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.118779296875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1161669921875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1097955322265625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0777362060546873, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.9661773681640624, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.8328277587890627, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.68290771484375, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.5898062896728518, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.515026092529297, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.4538194274902345, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.402174415588379, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.35469575881958, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.32094352722168, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.2957632160186767, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.2676917552947997, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.2435345602035524, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.2290789556503294, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.209751453399658, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.1909636211395265, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.1795341539382935, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.179389042854309, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.192687622308731, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2119580245018007, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2415675568580626, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.281220918893814, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3428503119945527, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.428775199651718, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.499850627183914, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.6407448971271514, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.6850103890895842, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01959559690207243, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019606283847242594, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01961958846542984, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01963047807570547, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019639911102131008, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01964938555844128, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01965962804853916, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01967113247141242, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01968055246863514, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019689631159417333, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019692275705747306, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01968764288816601, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019670194243080915, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019622789160348476, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019546745992265642, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.019450231618247927, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019313629944808782, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019604834713973104, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02165128182154149, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.024417226132936776, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.027350049773231147, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.028882513288408518, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.029965696465224026, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.030746529595926404, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.031640999047085644, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03246649984270334, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03327160377055407, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03383455969393254, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03470451232045889, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03544739086180925, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.035920818820595744, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03686053745448589, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.038457212392240765, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03989466055296362, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04163979224860668, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.042416609469801185, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.043349558971822265, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04457843361422419, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04712778156623244, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04961728850379586, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05380488542839885, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05809209791943431, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06833501223474742, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06876063352450729, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123073101043701, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1229991912841797, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.122877359390259, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1227850914001465, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1226625442504883, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1224632263183594, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1221556663513184, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1217257976531982, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.121023178100586, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1201910972595215, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.119246244430542, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1178197860717773, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.116382122039795, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1142544746398926, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.111678123474121, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1071362495422363, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.0854108333587646, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.9396486282348633, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.712224006652832, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.6202340126037598, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5527515411376953, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.519213914871216, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.489189386367798, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.447277069091797, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.416125774383545, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.403183937072754, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4016306400299072, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.404843807220459, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.415937900543213, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.441565752029419, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.4682319164276123, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.491774320602417, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.470327854156494, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.543682098388672, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.557039737701416, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.52850604057312, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.5230908393859863, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.54254150390625, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6749753952026367, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.706235408782959, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.77056884765625, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7701163291931152, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.88462495803833, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9138669967651367, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.067921742340347, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07069029162052418, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07198228128460686, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07345884090070137, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.08324104835732743, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.13399778516057587, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.18844592100406055, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2081949058693245, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.22554448135843486, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2382798080472499, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24806201550387597, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26596530084902176, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2763012181616833, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.28017718715393136, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.27759320782576596, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2768549280177187, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2692875599852344, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2606127722406792, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.25692137320044295, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.24953857511997046, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2617201919527501, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.24363233665559247, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2425249169435216, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24916943521594684, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2397563676633444, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22185308231819859, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22314507198228128, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22757475083056478, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22739018087855298, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2203765227021041, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20948689553340716, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009174536852764004, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.008918007970215603, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.008024982723181496, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.007902160995102648, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.008254866028039256, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008663339119646894, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009221444253764527, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01004426379721224, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011562870185497169, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.013811070220683874, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.016089508957785853, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01586907962842427, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0170741717067887, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.018009494528745378, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.018764552901323724, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01987311197465722, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.028944502637963578, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.06234454757922651, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.11282221105895868, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.13180160352434936, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.15106274401925013, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.16566294775665913, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.17993453245395927, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19756227190279937, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20682801900177497, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2131121307060022, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2138538059266916, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21550862755432224, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.21330749670482282, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.21048428138899614, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.20553378716401735, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19540666219684455, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.208749938407394, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19778298410412518, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19352473375989776, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.19676046169838282, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.19731296916819785, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19375387910671882, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17388604116071657, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16953687172282295, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.166029593435246, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16868259392931098, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15059157010651605, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.14947887434442, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 2.35469575881958, "validation/loss_best": 2.403183937072754, "validation/acc_best": 0.28017718715393136, "validation/f1_best": 0.2131121307060022} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 2.6564266228675844, "train/grad": 0.23922759011387826, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.124466552734375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12398681640625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12342041015625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1228125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1222216796875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.121417236328125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.120484619140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.119593505859375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.118319091796875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1170703125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.115836181640625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1137890625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.111607666015625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.10716552734375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0962677001953125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0529751586914062, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.9085382080078124, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.7237754821777345, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.5995121002197266, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.5283834075927736, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.458505172729492, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.402248363494873, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.348208198547363, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.3019134616851806, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.2604838752746583, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.220159034729004, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.187191758155823, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.1644418239593506, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.1319981098175047, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.1088276600837705, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.108949427604675, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.092782292366028, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.0784324955940248, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.0740341091156007, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.086030943393707, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.1115691936016083, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.149805003404617, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.17504288315773, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2198977887630464, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3142526918649673, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3431439006328585, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4039863646030426, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4386299300193786, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.553353979587555, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019874546425417066, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019884636397473513, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01989947071298957, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019912566323764622, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019925592602230608, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019940828876569866, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019955115308985114, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019967029495164754, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01997569587547332, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01997683775611222, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019971337784081698, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01994351643603295, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019899514890275895, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019795759445987642, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01973673402797431, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020386254847981036, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023046145560219885, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.026585052022710443, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.028870627507567404, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02991140735335648, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03127601592801511, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03247998914681375, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03341995617374778, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.034015869460999965, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03475122122094035, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03542075769975781, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.036254978263750674, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0367969872802496, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03728791818022728, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03777689953334629, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03848275202326477, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04028270961716771, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.041968686003237964, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.043597824852913615, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04500906346365809, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04600888073444367, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.047857344001531604, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.049054622258991, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.051191650461405516, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05491307782009244, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05422175588086248, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.057407456375658514, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0580387226305902, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06441110821440815, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.121206283569336, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.120800256729126, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.120170831680298, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1195638179779053, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1189815998077393, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.118203639984131, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.11734938621521, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1164093017578125, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1152021884918213, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1138622760772705, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.112431287765503, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.110018491744995, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1070802211761475, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.098749876022339, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.063857316970825, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.9186482429504395, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.699523448944092, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.59384822845459, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5262982845306396, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4846489429473877, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4445595741271973, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4247043132781982, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.418778419494629, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4129574298858643, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.387951612472534, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.36922287940979, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.375828742980957, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.3814635276794434, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4265527725219727, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.440481662750244, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.462249994277954, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.500621795654297, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.525090456008911, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.557065725326538, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6104488372802734, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.695559501647949, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.784498691558838, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.824270248413086, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8192150592803955, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7944211959838867, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8296828269958496, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.826740026473999, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.955418825149536, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.929443836212158, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.067921742340347, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07456626061277224, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07918050941306755, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.09671465485418974, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.14119601328903655, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.18290882244370615, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.20930232558139536, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.23089700996677742, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2484311554078996, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2631967515688446, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26836471022517533, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.268733850129199, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.28146917681801403, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.284422296050203, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2858988556662975, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2779623477297896, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2724252491694352, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.27556293835363604, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2731635289774825, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.26411960132890366, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.26301218161683276, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2561830933923957, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23606496862310816, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2235142118863049, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20487264673311184, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20634920634920634, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.19915097822074565, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20653377630121816, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.1937984496124031, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20081210778885197, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.17404946474713917, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.19564414913252123, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010395976097096638, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010122913952878088, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010317177157279435, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01054650976576806, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010717407555816641, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01033138037575636, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.010814369204677867, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011281804207192, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01169970959084374, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.011795072340758586, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.012532099671716241, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.013795042980987875, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.014999754380137877, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.018045281508793018, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.03470803988022066, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.06439863056438948, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.09984979397120657, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.12624418697877382, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1525909700048086, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.17422859394151083, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19019442083119084, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19656525331198535, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1956559173959478, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19673008197715317, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20753910872951886, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21555713082803366, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21846334473860565, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21629884176501155, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.21286069938420818, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.21853302162966481, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.2190421243500714, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.20348426256286056, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.20902924199052753, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.2089237164581431, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1893917423859254, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1850405170104911, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17140645028207122, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1697341962460522, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16309962599105055, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17139628694485973, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16301680768733046, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16887981589844184, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15962595503057897, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16228624233069752, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 26, "lr_best": 0.00041999999999999996, "wd_best": 0.05, "train/loss_best": 2.187191758155823, "validation/loss_best": 2.375828742980957, "validation/acc_best": 0.2858988556662975, "validation/f1_best": 0.21846334473860565} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 2.595235539674759, "train/grad": 0.24459418684244155, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12849853515625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1279296875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.126986083984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1261376953125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12523681640625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12404052734375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1227783203125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.121336669921875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.11953125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11760009765625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.115552978515625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.112091064453125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.10646484375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0759820556640625, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.9444561767578126, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.770089111328125, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.6404488372802732, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.548613510131836, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.4722914123535156, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.4151258850097657, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.3534389305114747, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.3055407905578615, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.25636492729187, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.2112221336364746, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.169637107849121, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.128549156188965, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.0940405178070067, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.071787805557251, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.0409490656852722, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.02362535238266, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.0184995245933535, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.9972382688522339, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.9808849483728408, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.9795366680622102, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.9938340395689012, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.0284487688541413, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.0605768471956254, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.1078944063186644, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.1641925239562987, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.245002188682556, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.324056585431099, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.366834595799446, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4103156554698946, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.533506541252136, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019844321217387915, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019847514987923205, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019852504990994932, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019857520293444394, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019859326272271573, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019860214851796627, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01985745720565319, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019849612805992366, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019830195708200337, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01979811752215028, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019753327397629618, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019655714770779015, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019534200159832836, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01972201651893556, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022122020004317164, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.025530016645789147, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.027921927012503147, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02939388457685709, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.030979309659451247, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03222930647432804, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03338776278309524, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.033922607814893126, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03443490914069116, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03480811164714396, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03536899061873555, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.036203079456463456, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.037360880570486185, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.038022082522511484, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0391306016780436, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03995574427768588, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04024178875610232, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.041583707369863984, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0430972701869905, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0446552200242877, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0458914553001523, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04707783095538616, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.047882284615188835, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04933114780113101, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0515590300783515, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05448943542316556, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05688999166712165, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.057473795749247075, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05827399680390954, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06285897931084036, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1206789016723633, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1202876567840576, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1196351051330566, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.119067907333374, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.118530035018921, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1177780628204346, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.117002487182617, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.116079330444336, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1148440837860107, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1133384704589844, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1114914417266846, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1070263385772705, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0962250232696533, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.001464366912842, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.7524776458740234, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.6298203468322754, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5472571849823, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4860997200012207, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.444964647293091, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4205238819122314, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.3968002796173096, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.3852875232696533, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.382718324661255, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.3814847469329834, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.3837075233459473, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4070045948028564, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4441332817077637, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4622864723205566, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.490825653076172, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4887664318084717, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.515780448913574, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5588431358337402, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.6255862712860107, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.65313458442688, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.671426296234131, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.668501615524292, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.688173770904541, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.716459274291992, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7880308628082275, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8165106773376465, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8454222679138184, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.767810344696045, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8343582153320312, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.789090156555176, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07308970099667775, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07493540051679587, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0812107788851975, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.08748615725359911, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.12643041712809155, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.17552602436323367, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.20837947582133629, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.23901808785529716, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.25507567368032485, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26651901070505724, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27205610926541157, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.27888519748984864, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2796234772978959, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2827611664820967, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2823920265780731, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.28036175710594313, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2763012181616833, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2652270210409745, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26448874123292726, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.25987449243263194, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.26578073089701, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.25101513473606496, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.24640088593576967, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23754152823920266, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.24289405684754523, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2454780361757106, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23772609819121446, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23588039867109634, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21539313399778517, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19564414913252123, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.1969361387966039, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21373200442967885, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20284237726098192, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2203765227021041, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010388305196627085, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010283947819567797, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010247238986295155, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010447879972554886, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010399655773924703, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010944818834872708, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.012843139551172049, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.013966297127777328, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.015610614662650258, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.017378439086015326, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.019537961792881055, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.02624150924982349, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.031467966975982975, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.056236131226570445, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.09171354324479175, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.12807353122313056, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.16162397870020231, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1808875297789184, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1964344843645726, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20378589073365708, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21170289724540312, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21679061214816486, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2232570213088125, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.22319851207878263, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.22328395488908512, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.22127740954100195, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20879926250666406, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20854583650416658, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2022610096988466, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.2103968989396313, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.20300443896009956, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.20199553567038783, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19195291900653375, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19483227994084096, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19416552867341022, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18906738917019572, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18907067115691578, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18410029110634865, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18680619246361407, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16826926021681987, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17669093271826175, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17971590789876346, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17266142784161778, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18384796514048984, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 22, "lr_best": 0.00021599999999999996, "wd_best": 0.05, "train/loss_best": 2.25636492729187, "validation/loss_best": 2.382718324661255, "validation/acc_best": 0.2827611664820967, "validation/f1_best": 0.2232570213088125} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 2.5128181660175324, "train/grad": 0.24882434129714967, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.127274169921875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12650146484375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.125316162109375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12412109375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12297119140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1215087890625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.119752197265625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11771484375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.11521240234375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11219482421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1086669921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.099346923828125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.0670233154296875, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.8704928588867187, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.679040832519531, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.5865097045898438, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.5049649047851563, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.4280907821655275, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.355425548553467, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.3040593338012694, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.249814147949219, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.2046172332763674, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.150956001281738, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.1031513452529906, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.059708013534546, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.013568778038025, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.9742545533180236, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.9507573819160462, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.9191560125350953, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.905413168668747, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.9008547067642212, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.8778087788820266, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8625439429283142, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.8662534993886948, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.870245926976204, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9111606460809707, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9409590071439744, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.9877601450681686, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.0537713050842283, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.1372509455680846, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.187122728228569, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.242693703174591, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.3054903769493102, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.377216775417328, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01935647019650787, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01935990573372692, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01936320404522121, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019364177738316356, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019362517995759845, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019356824904680252, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01934428938664496, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019324192623607815, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019284329097718, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01922647378407419, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01915241469629109, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019028045400045813, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01929378751665354, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02276906997896731, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.026650598831474782, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.028120533181354403, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.029484847756102682, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.031045689033344388, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.032826737649738785, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03368274875916541, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.034181228829547765, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03458499548956752, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.035444459011778236, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03618288702331483, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03702043008990586, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0380581983551383, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03922738507390022, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.039973835237324236, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.041362615618854764, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04241580003872514, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.043058219458907844, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04455890478566289, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04597010150551796, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.047644768431782725, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04825223669409752, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04857042670249939, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04912687784060836, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05027291029691696, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.052661919798702, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05475857641547918, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.053435223326086995, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.054942538626492024, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.057182025779038664, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.057605012580752374, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.120199680328369, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1197445392608643, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.119119644165039, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1184937953948975, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1178956031799316, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.11710524559021, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.116225242614746, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.115222454071045, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.113741636276245, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1117217540740967, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1084372997283936, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0940141677856445, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.016876697540283, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.739414691925049, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.613433837890625, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5430593490600586, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4846205711364746, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4389405250549316, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4073996543884277, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3910577297210693, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.377293109893799, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.375741958618164, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.3910434246063232, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4089925289154053, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4301092624664307, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.464829683303833, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4962449073791504, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.496358633041382, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.545808792114258, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5819828510284424, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.6339855194091797, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.666501998901367, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.648045539855957, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.703735828399658, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.70578670501709, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.755873680114746, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.8284215927124023, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.780703544616699, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8049540519714355, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.9491636753082275, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0689570903778076, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.025212287902832, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1068201065063477, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0098845958709717, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07364341085271318, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07345884090070137, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07567368032484312, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07585825027685493, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.08250276854928018, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1168327796234773, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.18087855297157623, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.21760797342192692, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2336655592469546, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2513842746400886, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2622739018087855, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2707641196013289, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2742709486895533, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.27870062753783686, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27999261720191954, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2748246585455888, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2727943890734588, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26688815060908083, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2617201919527501, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25378368401624213, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2587670727205611, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.26116648209671467, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2530454042081949, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2482465854558878, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23772609819121446, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2336655592469546, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.24141749723145073, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2355112587670727, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2100406053894426, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.1969361387966039, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20468807678110004, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20358065706902917, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20450350682908822, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011129222522922174, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011744416417559194, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013478643583185485, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014843427250818053, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015045314798998517, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.015365393987624335, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01616169210971576, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.017323752279242818, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01710944383611687, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01793413627916485, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.017859471430476483, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.020343625382335803, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.04351624155439057, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.09545244295760781, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.13290215686003418, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.15255882461152712, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17359631994426108, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19038512064338278, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2026759793521351, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20890313861308138, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21648346832958187, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.22156771168700962, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21847042222336077, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2151180985598928, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21450693017944047, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2147022348699489, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21454093135171423, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2185064965037639, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.21951368182223172, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.21269697732889417, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.21260212301793993, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.20956108120298603, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.20434791853021697, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19791796637702652, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19392643995042338, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18658424951341854, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18681752665991533, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19157042927609988, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18253785691022337, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16175358422265487, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.13995577526761013, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.14842407190437798, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.14532267818017552, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1422187064107843, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 2.2046172332763674, "validation/loss_best": 2.375741958618164, "validation/acc_best": 0.27999261720191954, "validation/f1_best": 0.22156771168700962} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 2.446714611053467, "train/grad": 0.25892608307302, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1233447265625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.122620849609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12127197265625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.120050048828125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.118681640625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.116993408203125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.115086669921875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11288818359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.109847412109375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.105611572265625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.098935546875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.063548583984375, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.917787780761719, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.676729278564453, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5710688781738282, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.5009061431884767, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4303046798706056, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3593791580200194, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.2919862174987795, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.2431640625, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.1865861892700194, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.136406002044678, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.076960287094116, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.023765640258789, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.974032392501831, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.921013344526291, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8731848961114883, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.845140934586525, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.8047285765409469, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.7914115154743195, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.7905913555622102, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.7580319845676422, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.740343742966652, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7360999369621277, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.762783322930336, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.7920320516824721, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.8255916875600815, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.8774820971488952, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.9276625907421112, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.030878232121468, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.130353865623474, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.171563872694969, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.2196246802806856, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.3131670546531677, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020002006953582167, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020006996812298894, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020011749994009734, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020014324598014353, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02001331445761025, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02000738293863833, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01999260898679495, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019966529253870248, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019914728552103043, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019836159762926398, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01974818584974855, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019998642290011047, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0224601486325264, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02737889379262924, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02939494591206312, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03052043754607439, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0320031874999404, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03369014950469136, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03515354298986494, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03576038341037929, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03627200528979301, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03670198011212051, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03757985369302332, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03825870842672884, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03915250844322145, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04050025202333927, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04186680112034082, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.042660812120884656, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.043987135458737614, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04487492745742202, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04548449967056513, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04690314481034875, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04796461205929518, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04890985121950507, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.050678244326263666, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0503042103163898, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05056353054940701, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05145359802991152, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05244594931602478, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05421733625233174, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.055774591360241176, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05554336687549949, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05610185796394944, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05812971806153655, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1197235584259033, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.119197130203247, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1183042526245117, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.117443084716797, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.116588830947876, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1153957843780518, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.114020347595215, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1123108863830566, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.109597682952881, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1050124168395996, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0950891971588135, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.021475076675415, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.804703712463379, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6210379600524902, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.541050434112549, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.491666793823242, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4517219066619873, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4235424995422363, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.408973455429077, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.408241033554077, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4124605655670166, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4247941970825195, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.445402145385742, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4744746685028076, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.493701457977295, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.521660089492798, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5667569637298584, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.578131914138794, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6213438510894775, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.6230576038360596, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.657534122467041, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.751955270767212, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8340935707092285, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.859870195388794, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.9815096855163574, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.006429433822632, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9662461280822754, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.924147844314575, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9059135913848877, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.907742500305176, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.864534616470337, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8732244968414307, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8505635261535645, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9899544715881348, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07493540051679587, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0753045404208195, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07622739018087855, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07715023994093761, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0784422296050203, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.08619416758951642, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.11701734957548911, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.16648209671465486, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.20967146548541898, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.23477297895902546, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2499077150239941, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26135105204872644, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26854928017718716, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2705795496493171, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27150239940937615, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2731635289774825, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2733480989294943, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2661498708010336, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25230712440014763, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2558139534883721, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25489110372831303, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2497231450719823, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2513842746400886, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24584717607973422, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2500922849760059, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2502768549280177, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.24197120708748615, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23145071982281284, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2320044296788483, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2262827611664821, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21963824289405684, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22129937246216316, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22185308231819859, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21483942414174972, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21705426356589147, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22056109265411591, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21631598375784422, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20616463639719454, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011679857498220217, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012955293560699724, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01335486628101014, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01466213090707705, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015624927852781724, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.016997914331064127, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.017912040187485937, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.020076825204235555, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021701912102568544, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.023241818929734853, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.028407086301493353, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.05251190001869338, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.08196155347008868, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1295964242769856, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.15992772113430384, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17719718112207325, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19402491219012588, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.207216559478522, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2116100980106194, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21384127805539752, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21587733731732114, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2194795525492144, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21347554698933144, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2014149090515367, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20496509283087327, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20484881748505912, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20067569558228326, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20378524052799096, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19835396010279552, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20278979828378133, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.20269909325806426, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18315401937298872, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17161404820507128, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17207477117182757, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16506492152822275, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15514244556781479, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1617947472053663, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17375618453003136, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17667544648723768, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1678337676911743, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16498029554439125, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16061356019309414, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1517829680946575, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.13743749384778606, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 2.136406002044678, "validation/loss_best": 2.4247941970825195, "validation/acc_best": 0.2733480989294943, "validation/f1_best": 0.2194795525492144} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 2.4013734781742095, "train/grad": 0.2633777601271868, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.121884765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.120855712890625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.119344482421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.117747802734375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11422607421875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11194091796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.10915771484375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1049169921875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0980535888671876, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.081466064453125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.9609320068359377, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.7474465942382813, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5990362548828125, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.515084533691406, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.450904655456543, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.381904754638672, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3142689514160155, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.250994234085083, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.202640943527222, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.1451665592193603, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.092030096054077, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.0271969842910766, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9719817566871642, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9202547430992127, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8593681025505067, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8056602370738983, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.772886371612549, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.729847601056099, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.7079624754190446, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.705065374970436, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.6700326132774352, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6550879245996475, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.652717829346657, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.6689370006322861, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.7197261065244676, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.7639173823595047, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.8317042392492295, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8792695623636246, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.9559976828098298, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.062699513435364, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.121958822607994, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.1880053770542145, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.2613965922594073, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019894354837015273, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019895492088980973, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019895239085890352, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01989116981625557, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019883577111177147, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019870664249174295, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01984453348442912, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01980604229029268, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019732939386740325, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019637404875829816, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019646675023250283, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021571536492556335, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.025702121248468757, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.028767403978854418, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03028636833652854, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03141226598992944, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0329872234351933, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03432662316597998, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03535633028484881, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.035995639516040685, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03670919748023152, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03732928420417011, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.038258572136983277, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03911585833877325, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04014872481115162, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0415563852712512, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04287353571504354, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.043942156806588174, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.045617449656128885, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04649115324020386, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0470271785184741, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0489413751475513, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.050228371154516935, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.051073759868741034, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05173891592770815, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.051362701561301946, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05193209232762456, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.053017566986382006, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05379435628652573, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05416078509762883, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05453849758952856, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0548903626576066, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.056700179800391194, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05605695579200983, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1190779209136963, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1184661388397217, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.117518424987793, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1166305541992188, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1157188415527344, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1144611835479736, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.112884044647217, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1108291149139404, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1068947315216064, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0981147289276123, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.068783760070801, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.8791487216949463, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.68239164352417, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.564723491668701, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.502948045730591, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4631290435791016, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4271082878112793, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.398083209991455, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3821680545806885, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3799548149108887, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.382451295852661, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.3984792232513428, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4267592430114746, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.448479652404785, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4703962802886963, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.516300678253174, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5934040546417236, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.630215644836426, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6793978214263916, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7468650341033936, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7477293014526367, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.856814384460449, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.942279815673828, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.9398908615112305, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.9183332920074463, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.9348807334899902, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0021684169769287, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9843404293060303, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.939784049987793, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.893941879272461, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.7571277618408203, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7732222080230713, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7484962940216064, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.71311616897583, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07161314138058324, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07788851974898486, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08213362864525656, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.09726836471022518, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.15614617940199335, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.19416758951642674, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2201919527500923, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24473975636766335, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.25802879291251385, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.27002583979328165, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2796234772978959, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2825765965300849, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.28202288667404946, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2853451458102621, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27888519748984864, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2692875599852344, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2652270210409745, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.262827611664821, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2556293835363603, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24234034699150978, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2454780361757106, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23920265780730898, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23292727943890734, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2321889996308601, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22683647102251753, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21816168327796234, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2201919527500923, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21926910299003322, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21520856404577335, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21207087486157253, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22757475083056478, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2368032484311554, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22683647102251753, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23311184939091917, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21908453303802142, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011419570267422055, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012273243775356296, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013141045887341815, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01396724003688535, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015671184462977154, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.016102874368151925, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.017751589000283865, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.019552809885560223, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02246744993663971, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02557017754313436, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.03617752331023822, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.07418001222742027, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.10899032197313092, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1384878103871687, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1646457899679639, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17934653663517908, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19640186218293318, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20717239848027122, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21292355278597988, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21647815897384048, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.22279899993431193, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2229894196077613, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21866374624766008, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21852941994156994, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.22236692632890295, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.22034920026895297, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2102272535047407, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2177040493513023, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2127893418410003, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20457920328098012, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.2025042229817343, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.2003817693440765, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1824826012245789, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18723674941821178, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18865575728462514, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18463443475915145, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18090542664781792, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18137699659042728, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17611020229907273, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.19179152300581329, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.19524763895944183, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1908293441659504, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19599230233624285, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1890940169634965, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.1451665592193603, "validation/loss_best": 2.382451295852661, "validation/acc_best": 0.2853451458102621, "validation/f1_best": 0.22279899993431193} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 2.3267429339885712, "train/grad": 0.26016073882579804, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.120220947265625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11927490234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1173828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.115712890625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.114005126953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11169189453125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.108980712890625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.105679931640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.09977294921875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.086796875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.039813232421875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.8186215209960936, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.6563258361816406, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.538604431152344, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4594251251220705, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.395026626586914, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.325585880279541, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2585321426391602, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1927260398864745, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1405415534973145, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.076948194503784, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.0189570140838624, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9518358278274537, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8933694934844971, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8350040078163148, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7666721093654632, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7037831437587738, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.659112902879715, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5995498931407928, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.5686671096086502, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.5700249916315079, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.5266565531492233, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.5017832869291305, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.5010181444883346, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.519092140197754, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.5668399691581727, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.6246892213821411, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.6799248558282853, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.7395382916927338, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.8409952360391617, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.9253370290994645, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9937254267930984, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.0393262362480162, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.123458881378174, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01959905216936022, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01959933533333242, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0195974929863587, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019592627026140688, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01958332019392401, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01956594465766102, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019534862278960646, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01948835584335029, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019404358197934925, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01934598248451948, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01987614523153752, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02363292989321053, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027230674708262084, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.029388663908466697, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.030895885117352008, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.032224751487374304, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03375950248911977, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03481395219452679, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.035687851840630176, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03636535820551216, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.037101257937029, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03767498468980193, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.038529609721153976, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03933726558461785, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04042547237128019, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04193739462643862, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04324240334331989, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04401854617521167, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04551024541258812, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0466193738207221, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04720652915537357, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04869808692485094, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04985719872638583, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.050985740292817355, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05176038172096014, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05165225278586149, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0519987921603024, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05174022123217583, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.052511154413223265, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0533767850138247, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0511653720960021, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.052538330405950545, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.051957259401679036, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05116779627278447, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1188302040100098, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1182069778442383, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1172337532043457, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1162588596343994, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.115251302719116, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1138198375701904, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.111921548843384, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1092498302459717, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.103236675262451, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.08508563041687, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0090901851654053, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7553632259368896, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.622955083847046, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.528409957885742, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.477140426635742, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4450912475585938, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.417734384536743, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.396373987197876, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3833160400390625, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3820407390594482, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.3834962844848633, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.3974289894104004, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.431617259979248, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4650070667266846, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5006392002105713, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5561017990112305, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6250159740448, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6514060497283936, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.754045248031616, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8019635677337646, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.8128445148468018, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.885124921798706, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.9241318702697754, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.992858409881592, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.988755702972412, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.000487804412842, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0259854793548584, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9838969707489014, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9450252056121826, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1044223308563232, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.037027597427368, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0156266689300537, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.994537115097046, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9143221378326416, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0754891103728313, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07788851974898486, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.09099298634182355, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.12107788851974899, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.1758951642672573, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2102251753414544, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2364341085271318, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.25193798449612403, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2633813215208564, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.27648578811369506, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27888519748984864, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2822074566260613, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2822074566260613, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2857142857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2847914359542267, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.27445551864156514, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26411960132890366, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26245847176079734, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2572905131044666, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2467700258397933, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24603174603174602, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22886674049464747, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2277593207825766, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2353266888150609, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22702104097452935, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2262827611664821, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21668512366186785, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2220376522702104, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21557770394979697, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2117017349575489, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21557770394979697, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2117017349575489, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19878183831672203, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.1954595791805094, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20837947582133629, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20893318567737174, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21797711332595054, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012653271563445978, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013238038524826877, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014211231662632053, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.0155758898298531, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.017671048521918756, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017895802869037768, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.018406650302229228, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.020438933498111597, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021659695703106534, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03029906508211458, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.05225464934075765, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.09294768402165676, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.12763849991697607, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15771498611576398, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1753237522940777, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19197551233886587, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.2079893786347463, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.2139033231413063, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21961892866230517, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22099500857875012, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.22569912400223457, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.23089533303313378, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.22401149745360982, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21671354511909394, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21790760917230556, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21705954746691092, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21129009056180192, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2138227408172376, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1991844027596743, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19912530991659805, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.2034206752502931, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19643679976264972, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19114934718596618, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18774263894179713, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18761819964726442, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1801606016765828, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.178494097304577, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18269150798309483, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18012220015001054, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15426205335488619, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1559733411355959, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15880052586078167, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16254174768312854, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16968909066740792, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.076948194503784, "validation/loss_best": 2.3834962844848633, "validation/acc_best": 0.2857142857142857, "validation/f1_best": 0.22569912400223457} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 2.2826666951179506, "train/grad": 0.2656134931743145, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1254931640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.124320068359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.122623291015625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.120836181640625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.119112548828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.116697998046875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.113663330078125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.109754638671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1018505859375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0772198486328124, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.9799273681640623, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.7324154663085936, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.6143275451660157, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5096800231933596, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4324735260009764, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3684970474243165, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3017640113830566, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2370369911193846, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.167321195602417, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1107874584197996, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.043333134651184, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.982157621383667, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9095677947998047, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8458152389526368, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7831723982095717, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7072464072704314, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6391131979227067, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.592952726483345, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5271693134307862, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4933975464105607, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4799390894174576, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.4364307564496994, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.4218398201465607, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.405426515340805, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.4224828559160232, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.4767525720596313, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.5272327834367752, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5938516008853911, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.6479308307170868, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.73197277367115, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.8298580038547516, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9074682837724686, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.9581480425596238, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.0422336196899415, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01969374533277005, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019690561955794692, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01968539527617395, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019676343835890293, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019667319692671298, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019647201066836716, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01961116180755198, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01955753756687045, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019471030221320688, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019571057143621146, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021104820417240263, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025834565702825784, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.028385363686829805, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030313446475192903, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03203516983427107, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03350645874626935, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03494434722699225, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.035892715062946084, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.036850663814693686, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03759231460280717, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03831700805574655, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03892819406464696, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03994722969830036, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04086914621293545, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04201078783720732, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04347981110215187, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.044851548075675964, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04575554167851806, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04715127035975456, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04865669069811702, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.049212649650871756, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05057064751163125, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05196031792089343, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05203444870188832, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.052551295589655635, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05282904993742704, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.052160514555871486, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.052522828429937364, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05302605226635933, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05267603920772672, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05064371610060334, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.051910204030573365, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05058340104296803, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05069047797471285, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.118561029434204, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1178197860717773, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.11666202545166, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.115478515625, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1143062114715576, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.112555980682373, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.11027193069458, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1067471504211426, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0975968837738037, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0611932277679443, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.927659749984741, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6887667179107666, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.589294672012329, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5109996795654297, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.468873977661133, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4426300525665283, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4219024181365967, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.405712604522705, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.396172285079956, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3975484371185303, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4051477909088135, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.425116777420044, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.453763961791992, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4826982021331787, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.520237684249878, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.577623128890991, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.652043342590332, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6808505058288574, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7446389198303223, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.772921323776245, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.795276165008545, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.8850185871124268, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.94279408454895, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0080485343933105, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.9899275302886963, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.9583065509796143, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9656667709350586, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.8896217346191406, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.873859167098999, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.823007106781006, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8088290691375732, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7641184329986572, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.747462034225464, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.761164665222168, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07161314138058324, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07345884090070137, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07493540051679587, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07807308970099668, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.08490217792543374, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.10834256183093392, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.14285714285714285, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.19158361018826134, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21853082318198597, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.24215577703949798, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2541528239202658, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2681801402731635, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.28202288667404946, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2809154669619786, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2853451458102621, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.28165374677002586, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.28054632705795496, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27740863787375414, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2707641196013289, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26411960132890366, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2561830933923957, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25083056478405313, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24307862679955702, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24510889627168697, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2454780361757106, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23717238833517904, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22554448135843486, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2320044296788483, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2264673311184939, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.23772609819121446, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2353266888150609, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22960502030269472, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23440383905500184, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.24603174603174602, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.24769287559985234, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22296050203026946, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012478093879930673, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013071263459258122, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01355167234048067, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014489963817872606, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014937788239070519, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.015766941405311242, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.017226333967857287, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.019702117645657675, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.025696765376140573, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.04712176721795446, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.06377967085718833, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.10618539247916135, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.13577713841868452, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1631243956544129, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1772957985263188, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19358874772428525, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.21104378063106313, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.2147661679931284, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2231370003012527, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2232632911305531, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.22492734679420465, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.22567825203375236, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2218462758717458, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21855035708019224, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2158484928120554, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2106229398006961, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20625556352967855, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20969999170100104, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.21010213756779625, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.2082879385744508, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.203889986530252, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.20653409954319635, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19619272873603322, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19582618576838354, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.20102796704889922, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.19906813601019438, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1941113366444429, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.2007183237511927, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.20674089506192475, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.20203030553429654, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1972531941525282, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.202174426123967, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.2057188360021267, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.17539635874055107, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.167321195602417, "validation/loss_best": 2.396172285079956, "validation/acc_best": 0.2853451458102621, "validation/f1_best": 0.2231370003012527} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 2.2129850006103515, "train/grad": 0.26502676993608476, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.117188720703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.115791015625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.113624267578125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.111646728515625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10951904296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.106541748046875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.102772216796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.09766845703125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.08552978515625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.038033447265625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.887003173828125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6624716186523436, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.562307586669922, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4636805725097655, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.387219772338867, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.325344486236572, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2611407089233397, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1951214599609377, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1239146518707277, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.065704746246338, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9952762746810913, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9308100128173828, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.852935528755188, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7838362288475036, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7135748887062072, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6267799204587936, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5498415905237197, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4938694536685944, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4148804181814194, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3625910812616349, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.346085368990898, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2870112997293472, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.2583336186408998, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.2422454369068145, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.2627173164486885, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.3181739947199822, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.3897147423028946, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.4515569108724593, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.4931577426195144, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.595040695667267, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.7049272102117539, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.7827108985185622, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.8538076376914978, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.9347897559404372, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01967860559467226, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01967810737900436, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019674790999852122, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019666815428063273, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019655444025993347, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0196316569019109, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01959127252921462, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019530205484479667, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01945710585452616, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019908917066641153, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022325345040298997, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02718141905963421, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.029237495362758638, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03115537405014038, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.033032549917697905, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.034451628644019364, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03567530368454754, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03650550869293511, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03746582201682031, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03817429940216243, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03884187268093228, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.039485238818451764, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.040508227273821834, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04148975994437933, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.042561068795621396, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04390594147145748, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04513467788696289, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04599619559943676, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04716563643887639, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04779608268290758, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04860183227807283, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04975992510095239, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05119630791246891, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.051578420475125314, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.051734044328331945, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05145249076187611, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.051953728534281256, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0521155752055347, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05180836282670498, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.052199629880487916, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05072121143341064, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05104844199493527, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05024672845378518, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.048695582710206506, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1184844970703125, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.11775279045105, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.11655855178833, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1153576374053955, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1141252517700195, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1122453212738037, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.109574794769287, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.105191230773926, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0918736457824707, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.029198169708252, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.8570008277893066, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6517722606658936, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5658600330352783, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4962501525878906, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4606761932373047, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.440075397491455, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.422930955886841, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4099934101104736, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4058408737182617, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.416576862335205, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4323339462280273, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4647634029388428, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.51704740524292, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5723254680633545, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.612541913986206, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.669994592666626, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.753952980041504, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.788708448410034, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.869436502456665, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8996520042419434, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9528260231018066, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0369820594787598, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.122648239135742, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.151068687438965, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.172813653945923, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.155921697616577, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.071129083633423, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.08292818069458, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.010227680206299, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.9648847579956055, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.9196817874908447, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.906588315963745, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8447482585906982, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.8948097229003906, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07401255075673681, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.08157991878922112, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1153562200073828, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.15651531930601698, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.19804355850867478, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21982281284606867, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.24326319675156885, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2567368032484312, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2705795496493171, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.27593207825765964, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27593207825765964, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27888519748984864, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27740863787375414, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.27150239940937615, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26891842008121075, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2602436323366556, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24880029531192321, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2469545957918051, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24510889627168697, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24160206718346253, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24510889627168697, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24067921742340348, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24473975636766335, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2351421188630491, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2320044296788483, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21816168327796234, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22093023255813954, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21834625322997417, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22259136212624583, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22739018087855298, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22314507198228128, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23274270948689554, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22388335179032853, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2307124400147656, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23791066814322628, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2279438907345884, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011189042889714595, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011762193513567975, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012170444620032953, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012799636394336067, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012882787958043726, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013944238183553663, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.015140782142954176, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.017110433164758458, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02524645563464006, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.05222191258501884, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.07427605055795701, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.11345856162822902, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.13795495265709148, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.16328291291909455, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18018119526734963, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.2008806658180802, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20721279473051726, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.21249478868340374, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21642594581796212, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21929889674096814, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2174670648164734, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21967080917974702, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21323168263989048, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20219784742787197, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20184742118725021, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19712820864611422, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1916783975573667, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19623117801994383, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19086015571883264, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19253129761277235, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18153638399688724, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19333684873821042, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1889792441935961, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1792525507597361, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1797596633879289, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1819247604126318, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18540648714143101, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18448801922117433, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18955553509218617, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.19220221576774066, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18597820871507342, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18964556579033762, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.2008481987781093, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18685282965317362, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.1239146518707277, "validation/loss_best": 2.4058408737182617, "validation/acc_best": 0.27888519748984864, "validation/f1_best": 0.21642594581796212} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 2.164076786637306, "train/grad": 0.2622937012463808, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.121197509765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.119862060546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11752685546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.115352783203125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.113238525390625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.110128173828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1061572265625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.10029541015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.084449462890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.013509521484375, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.835779113769531, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6428927612304687, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5491241455078124, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.451672134399414, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3749717330932616, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.31292724609375, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.24857027053833, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.181057958602905, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.105179104804993, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.042676591873169, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.96915203332901, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8997532677650453, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8155055689811705, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7398657989501953, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6639399313926697, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5719908487796783, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4919189471006393, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4310227566957474, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3413440757989883, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2789182490110398, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2459756469726562, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1847099062800408, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1540714365243911, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1244371137022973, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.1357790961861611, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.1737404671311378, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2541011807322502, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.317796640396118, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.358558194041252, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.4573291838169098, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.5962541514635087, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.66844577729702, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.7347627460956574, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.824448179602623, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020052106105722486, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020051040165126325, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020045612547546623, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020034028459340335, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020019899690523743, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01999191490933299, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019945005709305404, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01987551029305905, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019831845145672558, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020726740700192748, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02361620735377073, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0280156478472054, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.029823047034442426, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03171070284210145, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03365461495704949, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03496995825320482, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03601381234824658, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03674089341424405, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03765728213824332, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03829020135104656, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.038927665976807475, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03949777632951736, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04049543406814337, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04135853143408894, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.042370813954621556, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04380162490531802, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04519102042540908, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04614121336489916, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.047224751599133016, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04774387525394559, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0483896678313613, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.049382059816271064, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05000581029802561, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05041517721489072, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05105661179870367, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05011919690296054, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05089829977601767, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.050291721727699044, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05088982494547963, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.050555468630045654, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.049848007299005986, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.049146657269448044, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.047646708581596614, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04599886445328593, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1182029247283936, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1173901557922363, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1160223484039307, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1146247386932373, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1131391525268555, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1108882427215576, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.107682943344116, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.10223650932312, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.083827257156372, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9943764209747314, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.804269552230835, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.626105785369873, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.547125816345215, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4814021587371826, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4462249279022217, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.424499750137329, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4063282012939453, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3917784690856934, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.386084794998169, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.394826889038086, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.409099817276001, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4402031898498535, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4814116954803467, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5173490047454834, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5478084087371826, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6112797260284424, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.695671558380127, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.732940912246704, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8440167903900146, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.914207696914673, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.971102237701416, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1009521484375, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2130322456359863, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.284153699874878, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.334737777709961, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.309499740600586, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.294719696044922, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1915595531463623, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.112111806869507, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0479185581207275, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.122124671936035, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0036797523498535, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.959925413131714, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.81571626663208, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07290513104466592, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07511997046880768, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07936507936507936, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.08951642672572906, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.12532299741602068, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1657438169066076, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21040974529346623, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23181985972683647, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2513842746400886, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2646733111849391, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2777777777777778, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.28202288667404946, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2823920265780731, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.28552971576227393, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2833148763381321, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2842377260981912, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2757475083056478, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26411960132890366, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25950535252860835, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25987449243263194, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25396825396825395, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2410483573274271, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24732373569582872, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23864894795127353, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23421926910299004, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2264673311184939, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2235142118863049, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21096345514950166, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21114802510151348, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20782576596530086, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2131782945736434, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.23255813953488372, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23329641934293097, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21354743447766703, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22812846068660023, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22111480251015134, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.23883351790328536, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012660592745103783, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013568278051862878, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.015149721853957777, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015254886247197076, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.016486114677827683, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017541787958557986, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019984239058002843, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.022733075048485263, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03238986564839939, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.05469395484722676, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.08169657217843611, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12540552281477635, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1493254581176593, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17246493291357912, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.188794165183095, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20702743974258012, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.2143020915086157, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.21949947631351732, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22328145478955944, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2249021101337448, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2304198251772985, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.22380359821202034, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21427337334615415, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21123588522829748, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21627320989924673, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20998162943840873, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19945485769037805, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2095096521957814, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20706839853243775, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19855990993069486, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19294757633849183, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19621837056108396, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19194191746545144, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18102346848564269, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17873351729234777, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17213128974762604, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1775915581601655, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1752777529653802, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1965047657360696, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.20153190680153213, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16672857573450864, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18288170802714432, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1767840933293625, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18555903914310293, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.105179104804993, "validation/loss_best": 2.386084794998169, "validation/acc_best": 0.28552971576227393, "validation/f1_best": 0.22328145478955944} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 2.1107077270746233, "train/grad": 0.26164237171411514, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1187841796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11751220703125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.115169677734375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.112886962890625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.110667724609375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10736572265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.103089599609375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0964996337890627, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0760845947265625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.9814251708984374, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.788499450683594, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6138243103027343, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.523920135498047, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4271301651000976, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.350748062133789, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.290726890563965, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2267752742767333, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.157083339691162, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.081267280578613, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0170050048828125, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.941324429512024, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8688836908340454, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7839955282211304, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7041163814067841, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6214733004570008, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5215694200992584, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4316806572675704, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.3635738122463226, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.267725663781166, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2036987790465354, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.153619165122509, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0805691888928413, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.0347068521380425, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.0033449065685272, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.995991940498352, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.0435339519381523, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.1090287894010544, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.174592631161213, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.2072838628292084, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.305266946554184, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.4531478935480118, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.528616525530815, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.6247496855258943, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.7263463979959488, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01962692376226187, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019624465042725206, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019618784291669727, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019608404482714833, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019594633737578988, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01956474045291543, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019519083900377154, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01945722287055105, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01947345385327935, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020792754534631967, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024091409426182507, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02809014528058469, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.029790123514831065, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03174473415128887, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03375524335540831, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03506234091706574, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03609187132678926, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03690607128664851, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.037927093086764216, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03855851037427783, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.039144977824762464, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03969368044286967, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.040682863425463435, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.041634132526814936, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04274500995874405, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04417968574911356, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04538854952901602, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04627225674688816, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04733389677479863, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04806531826034188, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.048505895268172027, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04899501295760274, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04899853991344571, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0488267214410007, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04899960296228528, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.049822348337620496, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05017653925344348, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.050274552684277295, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.050688600614666936, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.050329363606870174, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.049885785169899464, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04969787979498506, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.048172413408756255, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04686723278835416, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1177072525024414, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1168053150177, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1153738498687744, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1139066219329834, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1123764514923096, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1099905967712402, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1065657138824463, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.100318193435669, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0770556926727295, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.965799331665039, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7702136039733887, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6107053756713867, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5365865230560303, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4741363525390625, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.440145492553711, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4197750091552734, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4026575088500977, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.388918161392212, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3855228424072266, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3958306312561035, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4105582237243652, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.439667224884033, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.478447437286377, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5168521404266357, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5592598915100098, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6344242095947266, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.73018741607666, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.788365602493286, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9101626873016357, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9675471782684326, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0375030040740967, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.174006462097168, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2890801429748535, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.3250951766967773, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3126275539398193, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.3853917121887207, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.3548688888549805, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.2913055419921875, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.2849442958831787, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2666876316070557, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1528189182281494, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0671637058258057, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0155434608459473, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9373226165771484, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07438169066076043, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0753045404208195, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07825765965300849, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.09560723514211886, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1330749354005168, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17349575489110372, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21225544481358435, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23440383905500184, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2528608342561831, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.27556293835363604, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2792543373938723, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.28202288667404946, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2833148763381321, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2809154669619786, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2857142857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27464008859357697, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26651901070505724, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26393503137689184, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25802879291251385, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24732373569582872, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23938722775932078, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2320044296788483, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22739018087855298, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2222222222222222, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20653377630121816, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20948689553340716, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21077888519748986, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20339608711701734, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20782576596530086, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20837947582133629, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20616463639719454, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2059800664451827, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20893318567737174, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2174234034699151, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22277593207825766, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2248062015503876, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011823353081504265, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012530567967717043, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013427299708100006, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014483512631266479, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014246258402031524, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01649229824724385, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.017827590638621806, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01984163808657575, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03462951637794559, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.057676064897914, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.08826872745798088, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12949504501868955, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15289063048465115, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17460153006998172, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1906054161931334, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20415808734158403, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.2120796658195461, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.21836662767425677, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2217716467381449, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2228493530050233, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.23250172775185304, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.22498451757038818, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21933606210634168, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2177878274689791, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21537345161724794, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2049017535984684, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20028158070331525, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20505052836588403, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20359494632742844, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19773181208950677, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19549903224232854, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1924152056213373, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18004342373429494, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1841207119655699, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18867884278746672, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18430664346183803, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18308042540579858, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18246880627316778, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1802430608001034, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17574270806436254, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1750680691558925, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1845155178292748, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1890502124000839, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18426128348676163, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 1.941324429512024, "validation/loss_best": 2.4105582237243652, "validation/acc_best": 0.2857142857142857, "validation/f1_best": 0.23250172775185304} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 2.0623699021339417, "train/grad": 0.255707271695137, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.114317626953125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.112860107421875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11058349609375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.108348388671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1059814453125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.102637939453125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0981396484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.090836181640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.06646728515625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.9555633544921873, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7610043334960936, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6008062744140625, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5131804656982424, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4170147705078127, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.340034637451172, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2783086776733397, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2118385314941404, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.139715232849121, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0605857086181643, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9954448652267456, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9173767471313476, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.843957369327545, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7566639184951782, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6729705321788788, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5881600588560105, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4819288218021394, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3841475397348404, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.309833767414093, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.204118599295616, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.12926200568676, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.0809862226247788, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.9978744024038315, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9541755571961403, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.8992672878503799, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.8916837027668953, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.9179774904251099, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.9788296344876289, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0362138590216636, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0682758638262748, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.1581908518075943, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2917377296090127, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.3696828448772431, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.4843209815025329, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.5954472076892854, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02004809054546058, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02004479051101953, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02003396665211767, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020018310509622095, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019997530370019376, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01996086029801518, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019900318635627627, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019823378934524953, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01986904738470912, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02145587037317455, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024970239885151388, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028712034337222577, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03036634175106883, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03236238483339548, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.034380629304796455, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035612456630915404, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03655133148655296, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03732677871361375, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03828353518620133, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0388489761389792, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03939697720110416, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03992489079013467, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04086300214752555, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.041718948408961294, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04262425173074007, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04381164209917188, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.044759829100221395, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.045204846784472466, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04580748965963721, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.046233066134154795, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04617945546284318, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.046532288882881405, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0463598520681262, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04618164509534836, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04635168781504035, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.046483506131917235, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04718583948910236, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.047876720651984214, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.048348853886127474, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.048545104563236234, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04766347657889128, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04772341627627611, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04703835967928171, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04546893084421754, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1176578998565674, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1167826652526855, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1153619289398193, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.113893747329712, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.112387180328369, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.109947443008423, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.106306791305542, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.099433422088623, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0721242427825928, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9461357593536377, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7499213218688965, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.600109100341797, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5284876823425293, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4681456089019775, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.435559034347534, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4166810512542725, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.401627779006958, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3892810344696045, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3856709003448486, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3953959941864014, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4092259407043457, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.440114736557007, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4826438426971436, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.522705316543579, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.565061569213867, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.639704942703247, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7358577251434326, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.791199207305908, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.910677194595337, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0082762241363525, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0682578086853027, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.193361282348633, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2887778282165527, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.406543016433716, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.443662166595459, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4486021995544434, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.422764778137207, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.399599075317383, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.336956262588501, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.263540029525757, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1403861045837402, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1042237281799316, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.024111270904541, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.8574488162994385, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07641196013289037, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08102620893318568, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.09929863418235511, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1362126245847176, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1773717238833518, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21613141380583242, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23735695828719083, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25544481358434845, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2692875599852344, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2783314876338132, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.28183831672203763, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2866371354743448, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.28903654485049834, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.28497600590623845, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.28313030638612036, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27593207825765964, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.266703580657069, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26301218161683276, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2563676633444075, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24732373569582872, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23791066814322628, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23772609819121446, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23089700996677742, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22369878183831673, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2172388335179033, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21631598375784422, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2144702842377261, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20376522702104097, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.19859726836471023, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.1984126984126984, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20358065706902917, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2056109265411591, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2056109265411591, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2072720561092654, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2069029162052418, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21631598375784422, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22462163159837578, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2334809892949428, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011998328498235839, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012730590166249006, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014372558866654643, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014708457186506138, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01602283022985036, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017190439607997248, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01986759065181656, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02359718885011812, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.040271155395105944, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06043896070851001, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09388174382006009, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1324374975568741, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15658285372593986, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17750115912270656, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1947584896409337, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20712501436671107, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.21455340508323018, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22366943066200815, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22604240233887776, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2262092997831496, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.22904576277178124, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.22347445009875044, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2154352991512161, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21532640014383322, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21418974431443713, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2092706336840291, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20296353473126183, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20547261590572266, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2042114913830557, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1981425255882445, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19398589352207685, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19632981606991398, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19230970833059055, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18963343971968324, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18638706044857986, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1833533674509489, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1921291750907148, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18635624967982334, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18807130343488745, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.190323636634825, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1911953621240827, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1955227907089366, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19754880026310562, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.2032396495153305, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.0605857086181643, "validation/loss_best": 2.3856709003448486, "validation/acc_best": 0.28903654485049834, "validation/f1_best": 0.22604240233887776} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 2.0302335011959074, "train/grad": 0.2516653487831354, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1219384765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1204052734375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.117901611328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11534423828125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.112906494140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.109283447265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1043603515625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0963677978515625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.068619384765625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.9451751708984375, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7502069091796875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6013893127441405, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5173661041259767, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.421947364807129, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.344800338745117, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.283625450134277, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2186140251159667, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1486319732666015, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0707629776000975, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0051020622253417, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9263051462173462, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8502583861351014, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.75837308883667, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6708712816238402, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5802449017763138, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4678641933202743, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3655579406023026, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.2844035333395005, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.1721394604444504, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.0844831171631812, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.018777941763401, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.938985875248909, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.8719617390632629, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.8281629052758217, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.8125078877806664, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.8185496479272842, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.8746635490655899, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.9136216133832932, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.9314130166172981, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.0187089088559151, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.1483672270178795, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.2279115259647368, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.3309575009346009, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.452229496240616, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01998905119020492, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019983874857425688, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019970783004537224, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01995355490129441, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019931861911900342, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019893062491901217, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019833928965963425, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019762243856675923, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019859675695188345, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021670275260694326, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02531343637034297, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028884262852370737, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030500683328136803, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03251750584691763, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.034550429256632924, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03575482696294784, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03667994847521186, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.037465516915544866, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03839502801187336, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03892118478193879, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03946200029924512, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04002544613555074, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04087204413488507, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04162100926041603, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04249442845582962, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04362225908786058, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04440829399973154, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04481002682819962, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04540335109457374, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04550311230123043, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04528091888874769, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04520801480859518, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04459183199331165, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04441162411123514, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.044293812308460474, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04451654981821775, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04518173752352595, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0460279224999249, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.046112579945474866, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04645923236384988, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04670979890972376, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04689876964315772, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.046276732217520475, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04511319784447551, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1175904273986816, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1167078018188477, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.115269660949707, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.113795042037964, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1122498512268066, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1097776889801025, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.105971336364746, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.098684787750244, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0686604976654053, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.933997392654419, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7387824058532715, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.5948538780212402, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5250542163848877, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.465136766433716, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4325265884399414, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4134812355041504, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.397984027862549, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3862693309783936, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.384477376937866, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.397611618041992, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4156200885772705, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4496419429779053, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.495922088623047, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5377044677734375, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.580704689025879, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6579062938690186, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7586185932159424, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.814415693283081, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9319963455200195, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0260186195373535, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0918610095977783, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.222985029220581, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.3628435134887695, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.4578006267547607, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.4902169704437256, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.5460407733917236, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5083425045013428, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.4827880859375, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.4298555850982666, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.4694788455963135, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.3636796474456787, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.2552988529205322, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.178673505783081, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0272529125213623, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.073827980804725, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07918050941306755, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.09874492432631968, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.13953488372093023, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17977113325950536, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21834625322997417, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23846437799926173, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2558139534883721, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2702104097452935, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2783314876338132, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2836840162421558, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.28552971576227393, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.28755998523440385, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.28165374677002586, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2823920265780731, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27611664820967147, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26504245108896274, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26245847176079734, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2558139534883721, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24584717607973422, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2382798080472499, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23864894795127353, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2279438907345884, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21908453303802142, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2203765227021041, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21502399409376152, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20431893687707642, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20321151716500555, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.1967515688445921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20136581764488742, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20468807678110004, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2129937246216316, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20431893687707642, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20431893687707642, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21077888519748986, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22056109265411591, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21816168327796234, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012687653445751804, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013196183796051303, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01395731389986162, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014627716583886424, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01554002760013849, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01650006666210987, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.018295379314201044, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02125648607729898, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03827152696841581, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06217019925575529, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09576788667433962, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.134971135363736, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15723068853667002, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17909239010717284, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.19587571780863364, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.2081114826818824, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.21659506754912386, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.222654975499867, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22507614770941023, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22476055407773207, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2286832137343103, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.22503247206003596, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2160364067709275, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21631079083426455, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21626437666745799, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20824319569457864, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20221169949427442, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20489526800007854, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20407919702119517, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19989812320443526, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19277329062445134, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19408344779921435, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18812190649930197, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18049509681539092, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18088196217143873, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17185190539039244, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1777357950702959, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17824803233210526, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18818184244272554, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1820727358484894, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1759654613159908, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1822797055012788, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1927028704077843, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18566160478737556, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.0707629776000975, "validation/loss_best": 2.384477376937866, "validation/acc_best": 0.28755998523440385, "validation/f1_best": 0.22507614770941023} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 1.988931525349617, "train/grad": 0.24305898562073708, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.117889404296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11645263671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.114097900390625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1117919921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10946044921875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1059375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10119384765625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0932293701171876, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0642315673828127, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.93515869140625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.737329406738281, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.5857749938964845, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.499796905517578, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4024737167358396, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.325863170623779, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2659463119506835, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.201320095062256, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.129591245651245, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0502655601501463, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9839008808135987, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9031689500808715, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8263419795036315, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7348600244522094, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6450201892852783, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5528224205970764, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4368154782056808, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3304116356372833, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.2466760557889938, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.1294352555274962, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.0392052268981933, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.9746712905168533, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.8842720553278923, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.8161441919207573, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.756963593363762, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.7286672264337539, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.7348219472169876, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.7721185013651848, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.799075913131237, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.8051449012756348, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.8790521001815796, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.0022777757048607, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.080423313677311, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.1697094452381134, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.2984671515226365, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019778201808221638, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019771042536012827, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019755276469513774, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019735936322249473, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019712517275474965, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019672892964445056, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01961431395728141, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019546105512417852, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019671299923211337, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021571614644490183, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025219035679474474, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028659024890512228, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030278088990598917, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.032309640971943734, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03429834424518049, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035447692582383755, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.036342890774831174, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.037118873884901406, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03804720584303141, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03856526853516698, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.039075742457062, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.039597877636551856, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04042497370392084, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04116844393312931, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.042026952784508465, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04303159771487117, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04375584345310926, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.044128936380147935, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.044458386730402706, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04425576215609908, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04404158191755414, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04351946720853448, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04287673881277442, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.042014711312949655, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.041839095167815685, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.041609582267701624, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.041792618799954655, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04216345263645053, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04207832219079137, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04300073040649295, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.043862748872488734, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.043869885131716725, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04358918644487858, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.042467429284006354, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.117546796798706, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.116654396057129, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.115220069885254, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.113718271255493, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.112133502960205, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.109590530395508, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1057217121124268, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0981435775756836, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.066575050354004, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9275450706481934, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7328743934631348, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.5916435718536377, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5223629474639893, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.462939500808716, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4300167560577393, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4106686115264893, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.3944783210754395, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3814189434051514, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3789334297180176, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3908677101135254, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.408205509185791, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.442436456680298, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4887499809265137, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.531501531600952, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5739316940307617, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.649596691131592, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7473607063293457, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.805365800857544, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9242331981658936, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0152804851531982, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.080566167831421, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.224529266357422, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.332981586456299, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.4641757011413574, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.5140786170959473, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.562643527984619, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5306620597839355, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.504539966583252, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.4633378982543945, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.505986213684082, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.39089035987854, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.302755355834961, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.2325780391693115, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0686721801757812, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07678110003691399, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08084163898117387, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1020671834625323, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.14156515319306018, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17995570321151716, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21834625322997417, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23938722775932078, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25839793281653745, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2713178294573643, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2777777777777778, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2833148763381321, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.28552971576227393, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.28792912513842744, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.28349944629014395, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.28460686600221485, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27759320782576596, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26356589147286824, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2631967515688446, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2469545957918051, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24437061646363972, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23052787006275377, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23052787006275377, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22314507198228128, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22517534145441123, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21188630490956073, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20062753783684018, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19933554817275748, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20062753783684018, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21280915466961978, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2056109265411591, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2081949058693245, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21188630490956073, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2207456626061277, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22240679217423404, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012658546899989654, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013904226827691246, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.015288050454500837, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01522949966661673, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.016141226631874186, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017376178448516764, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019814500246198016, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02293601108839084, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.04226071980007729, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0632756216514762, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09668198870213328, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13567112358784997, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15876066341285047, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.18174726573153333, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1981242004955981, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20896625329649968, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.21853889719282904, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22395121590401237, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22763360739742033, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22686078345076457, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.23048649639455396, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.22704723147641862, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2160440310871599, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21874974741695027, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21555672415783547, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2092781198465635, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21094184788133463, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20736508329808978, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2036140131638593, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.2024823937716639, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1961053360963013, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.198718338679332, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18906768537612959, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.189315441581744, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17971170787035587, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17556903994017323, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18052546739150208, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18056127477363001, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18833385575887288, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18345381021031706, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1807427151428198, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18324402055086852, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19054869342414524, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.19242124245190992, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.0502655601501463, "validation/loss_best": 2.3789334297180176, "validation/acc_best": 0.28792912513842744, "validation/f1_best": 0.22763360739742033} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 1.9681032633781432, "train/grad": 0.23871886104345322, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.119656982421875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11822509765625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.115712890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.113272705078125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.110882568359375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1073486328125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10243896484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0943536376953125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.064527587890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.9340313720703124, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7415425109863283, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.594451904296875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.508108139038086, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4083839797973634, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.329170207977295, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.267038555145264, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1994369220733643, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1254183864593506, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.044259395599365, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.977146668434143, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8944234180450439, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8156131386756897, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.721672821044922, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6295124900341034, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5340100169181823, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4169593155384064, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3089368510246278, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.2230816343426705, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.105758016705513, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.0173050564527513, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.9422226166725158, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.8535575729608536, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.784498599767685, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.7247438314557075, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.6956477665901184, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.6935472209751606, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.7202616454660893, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.7475434681773185, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.7396584162116051, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.8014478987455368, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.90752406924963, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.9807187613844871, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.065502493083477, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.1781329873204232, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019883163506165147, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019874487705528736, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01985871739219874, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01983942531514913, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019815877438522877, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019776468998752534, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019717743289656937, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019646564456634222, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0197824087459594, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021709850849583746, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025359512623399496, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028763134898617863, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030386295542120932, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03242856306023896, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03441308908164501, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035559040727093816, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.036430744249373676, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03721392251551151, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03815280146896839, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03867444476112723, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0391657111980021, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03965292166918516, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.040459813345223666, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.041162197683006524, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0419180466234684, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04277922937646508, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04337431205436587, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.043609442580491306, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04377501267939806, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04365333104506135, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04304061479866505, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0425115954503417, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04173433214426041, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04077639453113079, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.040068362951278684, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.039878131449222566, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04023084176704288, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04050570817664265, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.040089405123144385, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0407775610126555, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.041317744627594945, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.041487381253391506, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04171232733875513, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04111634058877826, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1175451278686523, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.116650342941284, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.115196943283081, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1136820316314697, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1121137142181396, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1095480918884277, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.105648994445801, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0979936122894287, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.065896987915039, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.925391912460327, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7311501502990723, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.5910074710845947, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5222628116607666, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4635143280029297, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4314870834350586, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4130289554595947, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.398059606552124, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3858048915863037, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.384521484375, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3968563079833984, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4147002696990967, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.448831558227539, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4962713718414307, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.540318727493286, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5831046104431152, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.659996509552002, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.759192705154419, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8175456523895264, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9403650760650635, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0353949069976807, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.102616548538208, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2378909587860107, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.35707688331604, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.4847638607025146, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.5370631217956543, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.5915849208831787, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5601441860198975, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.533367156982422, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.502692222595215, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.513491630554199, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.425438165664673, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.344144344329834, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.2910993099212646, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1449763774871826, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07290513104466592, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07604282022886674, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08102620893318568, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.10317460317460317, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.14174972314507198, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1806939830195644, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21963824289405684, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23938722775932078, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2571059431524548, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.271686969361388, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2777777777777778, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2822074566260613, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.28349944629014395, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2857142857142857, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2811000369139904, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2833148763381321, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27648578811369506, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2619047619047619, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25858250276854927, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2526762643041713, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24750830564784054, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24031007751937986, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23809523809523808, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23052787006275377, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2207456626061277, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22111480251015134, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21354743447766703, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2087486157253599, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.19896640826873385, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.1967515688445921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1984126984126984, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20339608711701734, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2131782945736434, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20524178663713546, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20191952750092285, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2087486157253599, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21908453303802142, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21705426356589147, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012864583057637385, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013828944144438214, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014915333387866124, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015443633322946515, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0155547924235166, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017791367851511142, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019433187858884732, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02310140827508848, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.04389554206983695, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06324519258028005, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09701527743151912, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13640106478134958, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15861798341922623, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.18084667579420066, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1993207415396214, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20835157820355973, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.21651603802363617, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22073783993578436, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2245021047693001, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22304164226108714, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.22921226661074345, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.22506336128621285, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21266639766452608, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21260564594323542, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21111950287758444, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20765056963318562, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20443744905092567, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.205385720909147, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2052978688437016, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20224904782085742, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19444525028797596, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19691074200858535, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1881624637836372, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18786942930797612, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17848956880044162, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17524982673622636, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17753241717815058, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1789308012070304, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18970626732260118, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18426727971397094, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17766619411776854, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18264668050077446, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19278218000270742, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18756042189263622, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.044259395599365, "validation/loss_best": 2.384521484375, "validation/acc_best": 0.2857142857142857, "validation/f1_best": 0.2245021047693001} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 1.9649534034729004, "train/grad": 0.23497634209692478, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1204052734375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.119027099609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.116612548828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11417724609375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.111768798828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.108162841796875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10326171875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0949298095703126, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0644061279296877, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.932214660644531, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7378248596191406, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.593056945800781, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5091303253173827, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.412536163330078, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3356155014038085, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2754562377929686, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2096047019958496, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1364004039764404, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0546427249908445, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.986273536682129, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9030491256713866, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8244817399978637, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.732757363319397, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6428972971439362, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5497648859024047, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4308852851390839, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3229429346323014, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.236254829764366, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.1203070855140687, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.0155314379930496, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.94190588504076, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.8544394752383232, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.7730726003646851, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.7132160133123397, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.6840112470090389, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.6765658949315548, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.699825764298439, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.7242683905363083, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.7203328385949135, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.7743941530585289, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.8716524216532707, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.9368298548460007, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.0135419726371766, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.1249080243706704, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01951543352100998, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019507184815593062, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019491075747646393, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019470322304405274, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019446462001651524, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019404586250893772, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019344662395305932, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019275563536211848, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01941684482153505, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02135071593336761, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024968399088829755, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028314337115734816, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.029920058706775308, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.031967855710536244, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03396090537309646, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035126397982239724, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03604038276709616, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.036865667756646875, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03782956077717245, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03837020067498088, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03893542520701885, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0394478084333241, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.040222016945481304, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0408803478255868, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.041635639350861314, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04246957348659634, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04303150998428464, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0434300335124135, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.043672602903097865, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04330996679142118, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04279551615938544, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04207675211131573, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04106427438557148, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04011203840374947, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.039316077046096326, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.038904063627123835, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03909907026216388, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.03928250944241882, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03890502655878663, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03952105410397053, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03991089088842273, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04007965762168169, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.040372025370597836, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.039813213236629966, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1175177097320557, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1166446208953857, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1151835918426514, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1136856079101562, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.112081289291382, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.109525680541992, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1056244373321533, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.097966194152832, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0657639503479004, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9250073432922363, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7308061122894287, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.5908358097076416, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.522023916244507, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4633657932281494, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.431205987930298, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.41260027885437, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.397461414337158, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3850555419921875, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3837547302246094, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.396291971206665, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.414348840713501, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.448927164077759, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.496887683868408, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5408453941345215, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5840342044830322, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.66160249710083, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7614266872406006, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8194589614868164, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.942411422729492, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.035668134689331, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1048245429992676, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2399864196777344, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.3564398288726807, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.4805736541748047, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.5310604572296143, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.5837578773498535, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5561017990112305, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.531033754348755, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.501091480255127, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.522753953933716, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.430795669555664, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.3463704586029053, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.292611837387085, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.144590377807617, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07161314138058324, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07641196013289037, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08139534883720931, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.10317460317460317, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.14156515319306018, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1806939830195644, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21963824289405684, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23938722775932078, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2572905131044666, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.27113325950535255, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2781469176818014, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2822074566260613, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2833148763381321, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2870062753783684, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2827611664820967, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.28460686600221485, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27593207825765964, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2617201919527501, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2591362126245847, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25249169435215946, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24621631598375784, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24049464747139165, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23938722775932078, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22997416020671835, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22757475083056478, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22185308231819859, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2235142118863049, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21520856404577335, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20837947582133629, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20062753783684018, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19785898855666298, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2024732373569583, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20321151716500555, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21262458471760798, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2054263565891473, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2024732373569583, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21133259505352528, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21631598375784422, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21650055370985605, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012830729261650816, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013857853439445489, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014888115405963263, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015546399796151739, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015680843269394566, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01753387318105411, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01960587882131607, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.023460607827784048, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.04349544583438832, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06315808351243844, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09702023252500058, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1365296014674545, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15844929012365003, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1810947025989137, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1988743579420451, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20881824483707886, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.21673876525987026, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.2206667603766308, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2256492492538736, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2250440348038941, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.23047891794255992, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2243862099922794, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2123000225632179, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2129351572753675, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21059473731002398, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2057505869251389, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20369323358780866, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20656692604299523, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20389268889036719, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19938059071231842, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19490411577396938, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19835677528067838, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18998878678479877, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18711139910730598, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1799965420610954, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1757854738570033, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1816813456642463, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17822141530446978, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18907438630180426, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18476862176072031, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17780632512509542, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1834869351846302, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19030788104692042, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18761456018801428, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.0546427249908445, "validation/loss_best": 2.3837547302246094, "validation/acc_best": 0.2870062753783684, "validation/f1_best": 0.2256492492538736} diff --git a/data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic/config.yaml b/data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54f0eab8bf03df699e2ddad03746888289dc995d --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv b/data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..090396a9fcfa99193c818571f8eab3723ab18037 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,ppmi_dx,,0.005994842503189409,train,0.7437722419928826,0.017238214864704983,0.705759162303665,0.0210815547529769,0.6981232885861217,0.019484812066777074 +flat_mae,patch,logistic,ppmi_dx,,0.005994842503189409,test,0.66,0.035259404419246784,0.5687468290208015,0.049124137780883016,0.5795795795795795,0.039470840732104176 +flat_mae,patch,logistic,ppmi_dx,1,0.3593813663804626,train,0.9323843416370107,0.010409687006177028,0.9279107762520085,0.011198074378860792,0.924213230571612,0.01179043854322467 +flat_mae,patch,logistic,ppmi_dx,1,0.3593813663804626,test,0.61,0.04657585211244127,0.5793334052421529,0.05076462347024729,0.5785229202037352,0.050024494815787676 +flat_mae,patch,logistic,ppmi_dx,2,0.3593813663804626,train,0.9306049822064056,0.010657530608719918,0.9255085220996822,0.011654457854008525,0.9192892314279597,0.012635988469985314 +flat_mae,patch,logistic,ppmi_dx,2,0.3593813663804626,test,0.65,0.046324939287601875,0.6338529134846741,0.04843507736701297,0.6362478777589134,0.04923399533444366 +flat_mae,patch,logistic,ppmi_dx,3,0.046415888336127774,train,0.8131672597864769,0.015532556724024073,0.7936099889827396,0.018112250403911057,0.783906015842432,0.018177310848532326 +flat_mae,patch,logistic,ppmi_dx,3,0.046415888336127774,test,0.65,0.049084950850540744,0.630450849963045,0.05155355772887281,0.6311544991511036,0.051790678743298575 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,train,0.7348754448398577,0.016811630093385613,0.6950934877200649,0.020784012970767686,0.6881422607578677,0.019135609607927668 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,test,0.71,0.03985531834021653,0.6745595331612613,0.04647806575511383,0.6693548387096775,0.044073003065889876 +flat_mae,patch,logistic,ppmi_dx,5,0.3593813663804626,train,0.9199288256227758,0.01180007854097942,0.914218554435094,0.012800892614655546,0.9088792549775209,0.013439308420039014 +flat_mae,patch,logistic,ppmi_dx,5,0.3593813663804626,test,0.61,0.04910776313374495,0.5793334052421529,0.05246830153419495,0.5785229202037352,0.05154971096762268 +flat_mae,patch,logistic,ppmi_dx,6,0.046415888336127774,train,0.8149466192170819,0.016120542364355995,0.7937934830160456,0.01877422096174009,0.7827419182187968,0.01845589674294388 +flat_mae,patch,logistic,ppmi_dx,6,0.046415888336127774,test,0.66,0.043933927664164055,0.609375,0.05325723969101963,0.6086587436332768,0.04859251463672096 +flat_mae,patch,logistic,ppmi_dx,7,0.005994842503189409,train,0.7295373665480427,0.01662902314584567,0.6884927870239363,0.020502533791182588,0.6820675444230357,0.018836863945668962 +flat_mae,patch,logistic,ppmi_dx,7,0.005994842503189409,test,0.63,0.04629589614641885,0.6009060511271707,0.049341284629581,0.5997453310696095,0.04858673060721092 +flat_mae,patch,logistic,ppmi_dx,8,0.046415888336127774,train,0.8131672597864769,0.015053464913152587,0.7925944150572026,0.01755359854001118,0.782166559623207,0.01745639146532642 +flat_mae,patch,logistic,ppmi_dx,8,0.046415888336127774,test,0.63,0.043199981481477516,0.5783475783475784,0.05055903971893245,0.5793718166383701,0.04628881605684955 +flat_mae,patch,logistic,ppmi_dx,9,0.046415888336127774,train,0.8078291814946619,0.015899288539000814,0.7869328952356881,0.01845072187435738,0.7769615713979876,0.018339837916690558 +flat_mae,patch,logistic,ppmi_dx,9,0.046415888336127774,test,0.7,0.0395913930040356,0.6553308823529411,0.048553197702691085,0.6511035653650254,0.04444857808079524 +flat_mae,patch,logistic,ppmi_dx,10,0.046415888336127774,train,0.8131672597864769,0.015550379847296993,0.7920740795551844,0.018017070255408926,0.7812968315135945,0.01781476026992815 +flat_mae,patch,logistic,ppmi_dx,10,0.046415888336127774,test,0.63,0.04253516192516493,0.5847828526540231,0.04814450581702176,0.5844651952461799,0.04529359556340037 +flat_mae,patch,logistic,ppmi_dx,11,0.005994842503189409,train,0.7330960854092526,0.01726522422320705,0.6878332740846071,0.022039131087563425,0.6814788053949904,0.019810494478883056 +flat_mae,patch,logistic,ppmi_dx,11,0.005994842503189409,test,0.7,0.040911900469178884,0.6553308823529411,0.05027566723226871,0.6511035653650254,0.04615250303031808 +flat_mae,patch,logistic,ppmi_dx,12,0.046415888336127774,train,0.8167259786476868,0.015871844629152792,0.79897553437169,0.018179645142737257,0.7902751016912867,0.018367188807754776 +flat_mae,patch,logistic,ppmi_dx,12,0.046415888336127774,test,0.66,0.03932333149670816,0.5952380952380952,0.04951542241207683,0.5984719864176571,0.04312967331998438 +flat_mae,patch,logistic,ppmi_dx,13,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,13,21.54434690031882,test,0.61,0.04793069580133382,0.5741893219783819,0.05184831910391855,0.5734295415959253,0.05026799577660129 +flat_mae,patch,logistic,ppmi_dx,14,0.046415888336127774,train,0.8202846975088968,0.015627963049049245,0.8004955801978806,0.01825728228243039,0.7896863626632413,0.018168440584852046 +flat_mae,patch,logistic,ppmi_dx,14,0.046415888336127774,test,0.63,0.04402055883334514,0.5713127099988413,0.052974682897818676,0.5742784380305602,0.04749155712595072 +flat_mae,patch,logistic,ppmi_dx,15,0.046415888336127774,train,0.798932384341637,0.01568459627152084,0.7784182300952887,0.017961794259995386,0.769736137871976,0.01777114220411203 +flat_mae,patch,logistic,ppmi_dx,15,0.046415888336127774,test,0.65,0.043483601506774945,0.6072270227808326,0.04960612012568691,0.6056876061120543,0.046483951635932254 +flat_mae,patch,logistic,ppmi_dx,16,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,16,1291.5496650148827,test,0.61,0.04822603446272563,0.5882166613873931,0.05022631046021138,0.5887096774193548,0.05062858093510596 +flat_mae,patch,logistic,ppmi_dx,17,2.782559402207126,train,0.99644128113879,0.0024731534296167142,0.9962334964144495,0.0026242208293934086,0.9953703703703703,0.0032173894153810006 +flat_mae,patch,logistic,ppmi_dx,17,2.782559402207126,test,0.56,0.04818213776909447,0.5280995280995281,0.05004368302174997,0.5280135823429541,0.04951026787993568 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,train,0.7508896797153025,0.01636139757497044,0.7163948844384849,0.019675055809721627,0.7081058659815885,0.018446841523655805 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,test,0.58,0.04675637282766917,0.525101763907734,0.051617500646467036,0.5288624787775891,0.0481300689917646 +flat_mae,patch,logistic,ppmi_dx,19,0.3593813663804626,train,0.9163701067615658,0.01236597197767312,0.9105807478122514,0.013380531806834146,0.9059890815671163,0.014038872492093683 +flat_mae,patch,logistic,ppmi_dx,19,0.3593813663804626,test,0.56,0.04821652828647039,0.5452666391070691,0.04931207792472886,0.5483870967741935,0.05069699630301417 +flat_mae,patch,logistic,ppmi_dx,20,0.046415888336127774,train,0.8042704626334519,0.016228053003499637,0.7835191618108471,0.01880971409659815,0.774071397987583,0.018735607751424098 +flat_mae,patch,logistic,ppmi_dx,20,0.046415888336127774,test,0.62,0.04198134823942652,0.5634191176470589,0.05006100586719623,0.566213921901528,0.045389328434490776 +flat_mae,patch,logistic,ppmi_dx,21,0.046415888336127774,train,0.797153024911032,0.01609966104067253,0.774535472972973,0.018657527369223704,0.7648121387283238,0.01829584924820583 +flat_mae,patch,logistic,ppmi_dx,21,0.046415888336127774,test,0.75,0.03862783970143814,0.7194478734148805,0.046020339778095797,0.7117996604414262,0.04388800191613831 +flat_mae,patch,logistic,ppmi_dx,22,0.3593813663804626,train,0.9270462633451957,0.01110616805008496,0.9219959714957937,0.012002569908839918,0.9172687861271676,0.012661216101359611 +flat_mae,patch,logistic,ppmi_dx,22,0.3593813663804626,test,0.64,0.045886224512374084,0.609375,0.04960413122133008,0.6078098471986417,0.048644847530646664 +flat_mae,patch,logistic,ppmi_dx,23,0.3593813663804626,train,0.9234875444839857,0.011254735202343382,0.918190896934613,0.012209825428700758,0.9135088846071505,0.012958374902863506 +flat_mae,patch,logistic,ppmi_dx,23,0.3593813663804626,test,0.59,0.04746301296799435,0.5577607593571352,0.051829222853880315,0.5573005093378608,0.05072215799779092 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,train,0.7455516014234875,0.01618879750556639,0.7073716023085186,0.020325785822282335,0.699421965317919,0.018816665474231102 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,test,0.67,0.04506384359994162,0.6239316239316239,0.05300692442838085,0.6218166383701189,0.049060753620487146 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,train,0.8327402135231317,0.015029200774694912,0.8167526917526917,0.01694708370918722,0.80762952258617,0.01708489560551875 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,test,0.59,0.03807237318581545,0.5071523019593701,0.049415883309391555,0.5216468590831919,0.041352327178294984 +flat_mae,patch,logistic,ppmi_dx,26,2.782559402207126,train,0.998220640569395,0.0018878793925199845,0.9981184064710746,0.0020004410137219944,0.9976851851851851,0.0024559912467505304 +flat_mae,patch,logistic,ppmi_dx,26,2.782559402207126,test,0.65,0.04559133250959003,0.6224786970121885,0.04928858107109426,0.6209677419354839,0.048476985163827334 +flat_mae,patch,logistic,ppmi_dx,27,0.3593813663804626,train,0.9288256227758007,0.010895274537924547,0.9239718614718615,0.01176756220526033,0.9195836009419824,0.01250185669419581 +flat_mae,patch,logistic,ppmi_dx,27,0.3593813663804626,test,0.56,0.04769318609612907,0.5331069609507639,0.049626802306315526,0.5331069609507639,0.04945408200610041 +flat_mae,patch,logistic,ppmi_dx,28,0.3593813663804626,train,0.9341637010676157,0.010145863784979793,0.9297412176522065,0.010951037728064102,0.9256583172768144,0.011725941224950678 +flat_mae,patch,logistic,ppmi_dx,28,0.3593813663804626,test,0.68,0.04409362312171681,0.6604414261460102,0.0467052166755184,0.6604414261460102,0.04650774182969822 +flat_mae,patch,logistic,ppmi_dx,29,0.046415888336127774,train,0.800711743772242,0.015966192864873767,0.7801159784811011,0.018462318964453304,0.7711812245771783,0.018321552710968966 +flat_mae,patch,logistic,ppmi_dx,29,0.046415888336127774,test,0.69,0.043067997399461225,0.6467236467236468,0.05087853709445476,0.6430390492359932,0.04712506738652775 +flat_mae,patch,logistic,ppmi_dx,30,0.005994842503189409,train,0.7313167259786477,0.01657739523571188,0.6862482669377946,0.020699046935196325,0.680033718689788,0.018682651503021553 +flat_mae,patch,logistic,ppmi_dx,30,0.005994842503189409,test,0.68,0.039638244158892805,0.6114618746964546,0.052507065459384675,0.6146010186757216,0.04464248726395057 +flat_mae,patch,logistic,ppmi_dx,31,2.782559402207126,train,0.99644128113879,0.002250663907769037,0.9962334964144495,0.002387755764328755,0.9953703703703703,0.0029279470281624916 +flat_mae,patch,logistic,ppmi_dx,31,2.782559402207126,test,0.73,0.04330267428231195,0.7052079921388797,0.04811900056834379,0.7007640067911716,0.04728903020090803 +flat_mae,patch,logistic,ppmi_dx,32,0.046415888336127774,train,0.8113879003558719,0.01609324282531149,0.7918954796338993,0.018560923308352247,0.7824609291372298,0.01855443168350566 +flat_mae,patch,logistic,ppmi_dx,32,0.046415888336127774,test,0.65,0.047498311548938243,0.6178622120318812,0.0521888010466694,0.615874363327674,0.05073711253333573 +flat_mae,patch,logistic,ppmi_dx,33,2.782559402207126,train,0.994661921708185,0.0030382269189876535,0.9943452231222015,0.0032301207870779054,0.9930555555555556,0.003952508167757085 +flat_mae,patch,logistic,ppmi_dx,33,2.782559402207126,test,0.61,0.04678331326445359,0.5953937130407718,0.047778236064547605,0.5988964346349746,0.048412857646781625 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,train,0.7259786476868327,0.017325225033143352,0.685325770796975,0.021143670891936896,0.6791773710126312,0.0195266059652476 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,test,0.7,0.0414789778080415,0.66078697421981,0.04907272849384992,0.6561969439728353,0.0462896824089861 +flat_mae,patch,logistic,ppmi_dx,35,2.782559402207126,train,0.998220640569395,0.0016835561371556851,0.9981184064710746,0.0017832672390809953,0.9976851851851851,0.002190181826577538 +flat_mae,patch,logistic,ppmi_dx,35,2.782559402207126,test,0.62,0.04939607676728994,0.6072757337742869,0.05035850410573864,0.6120543293718166,0.051291506097500404 +flat_mae,patch,logistic,ppmi_dx,36,0.046415888336127774,train,0.8220640569395018,0.014741378094079691,0.8036749807866974,0.016877357745376294,0.7937406336972811,0.016865797755317172 +flat_mae,patch,logistic,ppmi_dx,36,0.046415888336127774,test,0.67,0.0432086796836006,0.6239316239316239,0.05098854173256505,0.6218166383701189,0.04703630933881832 +flat_mae,patch,logistic,ppmi_dx,37,0.005994842503189409,train,0.7402135231316725,0.01667046857427181,0.6980540508714084,0.020716918213062003,0.6907380646542496,0.018819494745476364 +flat_mae,patch,logistic,ppmi_dx,37,0.005994842503189409,test,0.65,0.044008617337971435,0.6072270227808326,0.05086361558435065,0.6056876061120543,0.04782298598452175 +flat_mae,patch,logistic,ppmi_dx,38,0.3593813663804626,train,0.9270462633451957,0.011147240776891023,0.9221456736146072,0.012047109815631677,0.9181385142367802,0.012855508300204536 +flat_mae,patch,logistic,ppmi_dx,38,0.3593813663804626,test,0.51,0.05031262267065791,0.4873940788785438,0.05166238234843808,0.4876910016977929,0.05232252919018952 +flat_mae,patch,logistic,ppmi_dx,39,0.3593813663804626,train,0.9270462633451957,0.010392786806416605,0.9218435718186413,0.0112938012682791,0.9163990580175552,0.012113308149328378 +flat_mae,patch,logistic,ppmi_dx,39,0.3593813663804626,test,0.71,0.04497811023153373,0.6871966346672419,0.04877804965365929,0.684634974533107,0.04814916433329234 +flat_mae,patch,logistic,ppmi_dx,40,2.782559402207126,train,0.994661921708185,0.0030669137425523504,0.9943452231222015,0.0032602362347938893,0.9930555555555556,0.003989827600264863 +flat_mae,patch,logistic,ppmi_dx,40,2.782559402207126,test,0.6,0.048248423808452026,0.586606035551881,0.04935212502174878,0.5908319185059423,0.05051606991703183 +flat_mae,patch,logistic,ppmi_dx,41,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,41,166.81005372000556,test,0.6,0.04624499972970051,0.5833333333333333,0.04752848211787758,0.5857385398981324,0.048317275124008185 +flat_mae,patch,logistic,ppmi_dx,42,0.046415888336127774,train,0.8096085409252669,0.01599440607117329,0.7891655184261919,0.01850756382040202,0.7792763862128024,0.018385616113408432 +flat_mae,patch,logistic,ppmi_dx,42,0.046415888336127774,test,0.69,0.03625383290081201,0.627359057579036,0.04801521268524075,0.6277589134125636,0.04120893235277005 +flat_mae,patch,logistic,ppmi_dx,43,0.005994842503189409,train,0.7330960854092526,0.01603693764423095,0.6888150609080841,0.02033586674307677,0.6823485335046029,0.01842282888362388 +flat_mae,patch,logistic,ppmi_dx,43,0.005994842503189409,test,0.66,0.04172682111064776,0.609375,0.05053998134630239,0.6086587436332768,0.04600012741731729 +flat_mae,patch,logistic,ppmi_dx,44,0.3593813663804626,train,0.9234875444839857,0.011406790382618303,0.9178683705201625,0.012487149122671865,0.9117694283879255,0.01340001341290233 +flat_mae,patch,logistic,ppmi_dx,44,0.3593813663804626,test,0.6,0.04467072419381624,0.5659722222222222,0.04938129915528078,0.565365025466893,0.04819709453599131 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,train,0.8202846975088968,0.014743878324589607,0.801472465592921,0.017171873420101177,0.7914258188824663,0.017283774028087683 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,test,0.7,0.039960860851588266,0.6428571428571428,0.052097715650376505,0.6409168081494058,0.0457763088294024 +flat_mae,patch,logistic,ppmi_dx,46,0.3593813663804626,train,0.9163701067615658,0.012000536117674576,0.9102282189406427,0.013055624549286073,0.9042496253478913,0.01372294074397409 +flat_mae,patch,logistic,ppmi_dx,46,0.3593813663804626,test,0.59,0.04448066546264792,0.5464100011063171,0.048726322779253324,0.5471137521222411,0.04627956417959851 +flat_mae,patch,logistic,ppmi_dx,47,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,47,166.81005372000556,test,0.64,0.04864606458902919,0.6216897856242118,0.05125811738314717,0.6230899830220713,0.051853612662766856 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,train,0.7277580071174378,0.01617157150457233,0.681083306443537,0.02054093420458604,0.6754040890601585,0.018435000981067644 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,test,0.73,0.040806783749763965,0.7012943909724527,0.046831541148704754,0.6956706281833617,0.0453410560463028 +flat_mae,patch,logistic,ppmi_dx,49,0.3593813663804626,train,0.9217081850533808,0.011156228887524832,0.9158739878886848,0.012203198791308871,0.9094546135731107,0.013089811948528658 +flat_mae,patch,logistic,ppmi_dx,49,0.3593813663804626,test,0.68,0.04522273322124614,0.6567996567996568,0.0490402176656509,0.6553480475382003,0.04860585591548598 +flat_mae,patch,logistic,ppmi_dx,50,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,50,1291.5496650148827,test,0.57,0.04726373239599259,0.5413333333333333,0.049196590634802945,0.5411714770797962,0.048898238646628005 +flat_mae,patch,logistic,ppmi_dx,51,0.3593813663804626,train,0.9270462633451957,0.011363257924253373,0.9218435718186413,0.012341413419394862,0.9163990580175552,0.013107992117573549 +flat_mae,patch,logistic,ppmi_dx,51,0.3593813663804626,test,0.62,0.04749515764791186,0.5876736111111112,0.0515213030503951,0.5865874363327674,0.05026524302414357 +flat_mae,patch,logistic,ppmi_dx,52,0.3593813663804626,train,0.9234875444839857,0.01137994105748812,0.9185021060760885,0.012229473446315685,0.9152483408263755,0.012811465020240899 +flat_mae,patch,logistic,ppmi_dx,52,0.3593813663804626,test,0.57,0.04954372210482373,0.5501621508525996,0.05124321190346506,0.551358234295416,0.05189666940914281 +flat_mae,patch,logistic,ppmi_dx,53,0.3593813663804626,train,0.9234875444839857,0.010829907715599663,0.9183479015958076,0.011691624969273388,0.914378612716763,0.012386902690774571 +flat_mae,patch,logistic,ppmi_dx,53,0.3593813663804626,test,0.66,0.04408698674212153,0.6263736263736264,0.048899709396922904,0.6239388794567062,0.047114459652987405 +flat_mae,patch,logistic,ppmi_dx,54,0.046415888336127774,train,0.8149466192170819,0.01511884966699185,0.7953272075302555,0.017334436925392202,0.7853511025476343,0.017216995958282962 +flat_mae,patch,logistic,ppmi_dx,54,0.046415888336127774,test,0.57,0.04631496950231102,0.5361881134721174,0.04813824100853229,0.5360780984719864,0.047352537903094946 +flat_mae,patch,logistic,ppmi_dx,55,0.005994842503189409,train,0.7206405693950177,0.017055474892069846,0.6727456151087274,0.021872038478482056,0.6678842860201242,0.01954321427119159 +flat_mae,patch,logistic,ppmi_dx,55,0.005994842503189409,test,0.67,0.04176721680935898,0.6239316239316239,0.050372856141707646,0.6218166383701189,0.04611201290814557 +flat_mae,patch,logistic,ppmi_dx,56,0.005994842503189409,train,0.7402135231316725,0.015820824540065663,0.6980540508714084,0.020042233021532376,0.6907380646542496,0.018302346020512797 +flat_mae,patch,logistic,ppmi_dx,56,0.005994842503189409,test,0.66,0.04237109864046482,0.609375,0.05028585650521563,0.6086587436332768,0.045657313171607615 +flat_mae,patch,logistic,ppmi_dx,57,0.046415888336127774,train,0.8149466192170819,0.014609657045975642,0.7937934830160456,0.016978744545333414,0.7827419182187968,0.016762047105506103 +flat_mae,patch,logistic,ppmi_dx,57,0.046415888336127774,test,0.65,0.04353192391797082,0.612789025334661,0.04938304663217563,0.6107809847198642,0.04700648474207538 +flat_mae,patch,logistic,ppmi_dx,58,0.005994842503189409,train,0.7366548042704626,0.015139501093413886,0.6910114863961246,0.019958482765929982,0.6843689788053949,0.01788547183398129 +flat_mae,patch,logistic,ppmi_dx,58,0.005994842503189409,test,0.71,0.040768276882890205,0.6695156695156695,0.04864778068931428,0.6642614601018676,0.04513659711290077 +flat_mae,patch,logistic,ppmi_dx,59,0.3593813663804626,train,0.9128113879003559,0.011725351669984367,0.9065935370515469,0.0127280350666942,0.9013594519374866,0.013380589949057062 +flat_mae,patch,logistic,ppmi_dx,59,0.3593813663804626,test,0.59,0.047593344072464586,0.5577607593571352,0.05131076440209389,0.5573005093378608,0.05028650863531308 +flat_mae,patch,logistic,ppmi_dx,60,0.046415888336127774,train,0.8149466192170819,0.014671928917402719,0.79630868740939,0.01657395982360239,0.7870905587668593,0.01642901293797292 +flat_mae,patch,logistic,ppmi_dx,60,0.046415888336127774,test,0.62,0.04394186614152841,0.5558672276764843,0.05276214146481755,0.5611205432937181,0.046962157640764335 +flat_mae,patch,logistic,ppmi_dx,61,0.005994842503189409,train,0.7402135231316725,0.016604289286657564,0.6961577201090177,0.02145231756864424,0.6889986084350246,0.019389491471302027 +flat_mae,patch,logistic,ppmi_dx,61,0.005994842503189409,test,0.65,0.0429848857157955,0.612789025334661,0.04852484341873178,0.6107809847198642,0.046242462602962005 +flat_mae,patch,logistic,ppmi_dx,62,0.3593813663804626,train,0.9199288256227758,0.011041274967864735,0.9138750114935483,0.012034132103349061,0.9071397987582959,0.012674976197067814 +flat_mae,patch,logistic,ppmi_dx,62,0.3593813663804626,test,0.59,0.04693516805125981,0.5523528769516323,0.05142590250979453,0.5522071307300509,0.04952405996118903 +flat_mae,patch,logistic,ppmi_dx,63,0.005994842503189409,train,0.7277580071174378,0.015477195228568061,0.6820926148442554,0.019817437637674445,0.6762738171697709,0.017754013953282414 +flat_mae,patch,logistic,ppmi_dx,63,0.005994842503189409,test,0.62,0.039223844788597674,0.5287698412698413,0.05232415599448484,0.5458404074702886,0.04265631967723569 +flat_mae,patch,logistic,ppmi_dx,64,0.005994842503189409,train,0.7455516014234875,0.016401974242702285,0.7056136559277643,0.020745793226683604,0.6976825090986941,0.01905946178918023 +flat_mae,patch,logistic,ppmi_dx,64,0.005994842503189409,test,0.6,0.04171213732236698,0.5324918186068257,0.04865512546920447,0.5398981324278438,0.04370677613830487 +flat_mae,patch,logistic,ppmi_dx,65,0.005994842503189409,train,0.7348754448398577,0.016749895446830955,0.6959879176453931,0.02040954122631872,0.6890119888674802,0.018917676644256323 +flat_mae,patch,logistic,ppmi_dx,65,0.005994842503189409,test,0.62,0.04122365825590932,0.5558672276764843,0.04926977798342412,0.5611205432937181,0.043565355522523504 +flat_mae,patch,logistic,ppmi_dx,66,0.046415888336127774,train,0.8256227758007118,0.014879132509584966,0.8071352532496638,0.017220051698429165,0.7966308071076857,0.017286559254634248 +flat_mae,patch,logistic,ppmi_dx,66,0.046415888336127774,test,0.57,0.04632328140363115,0.5242836596968692,0.05075447169625858,0.5258913412563667,0.048265201061921365 +flat_mae,patch,logistic,ppmi_dx,67,0.3593813663804626,train,0.9323843416370107,0.010264667907520493,0.9271981018872042,0.011302894600352772,0.9198645900235496,0.0123987492489573 +flat_mae,patch,logistic,ppmi_dx,67,0.3593813663804626,test,0.58,0.04885182084631032,0.5543293718166383,0.05082351472148338,0.5543293718166383,0.05091677709065629 +flat_mae,patch,logistic,ppmi_dx,68,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,68,10000.0,test,0.61,0.05032061605346262,0.5920075321686369,0.051428650607478595,0.5938030560271647,0.05176689104172444 +flat_mae,patch,logistic,ppmi_dx,69,0.046415888336127774,train,0.8042704626334519,0.015288073983154677,0.782987208110423,0.017527423342716345,0.7732016698779705,0.01728414647711358 +flat_mae,patch,logistic,ppmi_dx,69,0.046415888336127774,test,0.62,0.04601097260436906,0.5876736111111112,0.04963759833105474,0.5865874363327674,0.048547625740989156 +flat_mae,patch,logistic,ppmi_dx,70,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,70,21.54434690031882,test,0.58,0.05106965831097757,0.5716034271725826,0.051158644035989326,0.5797962648556876,0.052613433115693733 +flat_mae,patch,logistic,ppmi_dx,71,0.005994842503189409,train,0.7384341637010676,0.01655246410913626,0.6955142026293385,0.020872103118869088,0.6884232498394348,0.018950218174758792 +flat_mae,patch,logistic,ppmi_dx,71,0.005994842503189409,test,0.69,0.043330409644959496,0.6408295678368672,0.052917225951874224,0.6379456706281834,0.047832262720110166 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,train,0.7437722419928826,0.016640622669166962,0.7003199431212228,0.0212699300550794,0.6927585099550417,0.019233785671805136 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,test,0.65,0.04604420050342931,0.6178622120318812,0.04998652247559603,0.615874363327674,0.048639499402014535 +flat_mae,patch,logistic,ppmi_dx,73,0.005994842503189409,train,0.7437722419928826,0.01709853092758148,0.704002808988764,0.021422158674500532,0.6962374223934917,0.019722441622039663 +flat_mae,patch,logistic,ppmi_dx,73,0.005994842503189409,test,0.63,0.0434587804706943,0.5783475783475784,0.05049439762845675,0.5793718166383701,0.046402310907814125 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,train,0.7437722419928826,0.017917216117502723,0.7066167833993127,0.021759650418835232,0.6988466067223293,0.020314583688510926 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,test,0.56,0.042678537931845784,0.4944852941176471,0.0479215521185412,0.5025466893039049,0.043786950718445884 +flat_mae,patch,logistic,ppmi_dx,75,0.046415888336127774,train,0.8149466192170819,0.014920021666091333,0.7958219800181653,0.017052145898232063,0.7862208306572469,0.016981368236054468 +flat_mae,patch,logistic,ppmi_dx,75,0.046415888336127774,test,0.63,0.044584566836518666,0.5783475783475784,0.051846529612908386,0.5793718166383701,0.04755958083202702 +flat_mae,patch,logistic,ppmi_dx,76,0.3593813663804626,train,0.9234875444839857,0.010495153662624346,0.918190896934613,0.011346926639601554,0.9135088846071505,0.011979699586560639 +flat_mae,patch,logistic,ppmi_dx,76,0.3593813663804626,test,0.59,0.047723855669884845,0.5523528769516323,0.052014778171367994,0.5522071307300509,0.05008298206490761 +flat_mae,patch,logistic,ppmi_dx,77,0.3593813663804626,train,0.9323843416370107,0.010470302464477717,0.9277732683982685,0.011333043685414183,0.9233435024619996,0.012142764657826922 +flat_mae,patch,logistic,ppmi_dx,77,0.3593813663804626,test,0.71,0.045106097148833436,0.6791680495630048,0.05074919778132066,0.6744482173174873,0.048931426241086345 +flat_mae,patch,logistic,ppmi_dx,78,0.3593813663804626,train,0.9341637010676157,0.0099078587021619,0.9291861205613619,0.010839329943624507,0.9221794048383644,0.01172398672994165 +flat_mae,patch,logistic,ppmi_dx,78,0.3593813663804626,test,0.58,0.04724287882845413,0.5320855614973261,0.051529424081888174,0.533955857385399,0.049035123153245146 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,train,0.7366548042704626,0.015370574356433655,0.6879651860744298,0.020169533813807376,0.6817597944765574,0.01785489448951675 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,test,0.68,0.042518344276323825,0.6259934548854604,0.052485460827112064,0.6247877758913413,0.04677805674579786 +flat_mae,patch,logistic,ppmi_dx,80,0.005994842503189409,train,0.7259786476868327,0.01661207944144877,0.6784849250338054,0.020986061114048563,0.6730892742453436,0.0187700748573764 +flat_mae,patch,logistic,ppmi_dx,80,0.005994842503189409,test,0.69,0.04184542985799046,0.6408295678368672,0.05121785794997131,0.6379456706281834,0.04642764661876571 +flat_mae,patch,logistic,ppmi_dx,81,0.046415888336127774,train,0.7953736654804271,0.01602273242323407,0.7744964288580372,0.018256143253620176,0.7659762363519589,0.018055792484215943 +flat_mae,patch,logistic,ppmi_dx,81,0.046415888336127774,test,0.69,0.044027718541845874,0.6615351020853806,0.04874209097764598,0.6583191850594228,0.04756623660892718 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,train,0.7277580071174378,0.016433104836354937,0.6820926148442554,0.02089786075495797,0.6762738171697709,0.018804829620971294 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,test,0.67,0.03922805118789359,0.6033177064551027,0.05260958128177306,0.6065365025466893,0.04467356584724605 +flat_mae,patch,logistic,ppmi_dx,83,0.005994842503189409,train,0.7562277580071174,0.01635836032223931,0.7220784377425236,0.01969697326749279,0.713310854206808,0.01851368385991163 +flat_mae,patch,logistic,ppmi_dx,83,0.005994842503189409,test,0.57,0.047828443420207596,0.5305164319248826,0.05140070801507274,0.5309847198641766,0.04996796056960766 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,train,0.7455516014234875,0.016303592241301252,0.7099066904903713,0.019988028077821834,0.7020311496467566,0.018748567215929532 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,test,0.6,0.043561157009427566,0.5477159656264134,0.04997458906526712,0.5500848896434635,0.04632567818596821 +flat_mae,patch,logistic,ppmi_dx,85,0.046415888336127774,train,0.806049822064057,0.015183422804515727,0.7846932499165247,0.017525441943508684,0.7746467565831727,0.017232432330590734 +flat_mae,patch,logistic,ppmi_dx,85,0.046415888336127774,test,0.62,0.042369347410598626,0.5703301673450927,0.04786310297134437,0.5713073005093379,0.044713056243769395 +flat_mae,patch,logistic,ppmi_dx,86,0.005994842503189409,train,0.7348754448398577,0.016216779626036876,0.6913715387195336,0.020492185851059355,0.6846633483194177,0.018594509135994086 +flat_mae,patch,logistic,ppmi_dx,86,0.005994842503189409,test,0.66,0.038678050623060095,0.5783730158730158,0.05276757553143949,0.5882852292020373,0.043119798965421174 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,train,0.7473309608540926,0.016164645957423198,0.7098458406050029,0.02022516837944074,0.7017367801327339,0.018797361152644826 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,test,0.61,0.048247379203434464,0.568536342515765,0.054454547141360665,0.5683361629881154,0.05183958645243559 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,train,0.8149466192170819,0.015501437453951317,0.7972582972582972,0.01744139474166176,0.7888300149860843,0.017398315322484627 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,test,0.62,0.0412782703125991,0.5558672276764843,0.050708729047478904,0.5611205432937181,0.04464124587598062 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,train,0.7313167259786477,0.01599905805157788,0.6881942071479223,0.020058568733694538,0.6817731749090131,0.01824327470696413 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,test,0.66,0.0379469946109043,0.5952380952380952,0.04999469758801305,0.5984719864176571,0.042972322941283586 +flat_mae,patch,logistic,ppmi_dx,90,0.046415888336127774,train,0.8096085409252669,0.014951454903231229,0.7891655184261919,0.017255195236052758,0.7792763862128024,0.01710531407861963 +flat_mae,patch,logistic,ppmi_dx,90,0.046415888336127774,test,0.64,0.04497494413559621,0.6043956043956044,0.048734623331713134,0.6027164685908319,0.04690619405378731 +flat_mae,patch,logistic,ppmi_dx,91,0.046415888336127774,train,0.8167259786476868,0.016184456728815293,0.7965449976275416,0.01869402661532587,0.7859264611432242,0.018467538329630676 +flat_mae,patch,logistic,ppmi_dx,91,0.046415888336127774,test,0.64,0.04284603132146548,0.609375,0.04651477866994276,0.6078098471986417,0.04555886325102043 +flat_mae,patch,logistic,ppmi_dx,92,0.3593813663804626,train,0.9306049822064056,0.01087661102633042,0.9258010460569746,0.011766274693149769,0.9210286876471847,0.012572357589480197 +flat_mae,patch,logistic,ppmi_dx,92,0.3593813663804626,test,0.6,0.04459809412968227,0.570999570999571,0.04841844591623819,0.5704584040747029,0.04803997111007479 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,train,0.7473309608540926,0.017021116824496762,0.7089866826144668,0.020682868512002798,0.7008670520231214,0.019110856767641113 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,test,0.56,0.045014179988088196,0.5225694444444444,0.04830942714237806,0.5229202037351443,0.04712169261922371 +flat_mae,patch,logistic,ppmi_dx,94,0.046415888336127774,train,0.798932384341637,0.016369122668857778,0.777885035762377,0.018761810295652662,0.7688664097623634,0.018517251424379783 +flat_mae,patch,logistic,ppmi_dx,94,0.046415888336127774,test,0.71,0.04131684402274695,0.6791680495630048,0.04696064506373695,0.6744482173174873,0.04517428467458815 +flat_mae,patch,logistic,ppmi_dx,95,0.046415888336127774,train,0.8238434163701067,0.014810580710988158,0.8044461627682196,0.017168397305576004,0.7934462641832585,0.017096627685788405 +flat_mae,patch,logistic,ppmi_dx,95,0.046415888336127774,test,0.59,0.044525839688881776,0.539894512400404,0.05023487383565489,0.5420203735144312,0.04690450955038525 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,train,0.7348754448398577,0.016671173556787777,0.6959879176453931,0.020668656967633606,0.6890119888674802,0.01912790671118804 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,test,0.58,0.04383559740667395,0.5174632352941176,0.05000271304685942,0.5237691001697793,0.04574867003543929 +flat_mae,patch,logistic,ppmi_dx,97,0.005994842503189409,train,0.7313167259786477,0.01633186621236368,0.6881942071479223,0.020214960989664143,0.6817731749090131,0.018430366752712136 +flat_mae,patch,logistic,ppmi_dx,97,0.005994842503189409,test,0.73,0.03869852193559853,0.6815662224318906,0.05026915458821272,0.6752971137521222,0.04492819149236752 +flat_mae,patch,logistic,ppmi_dx,98,0.3593813663804626,train,0.9323843416370107,0.010272474234728202,0.9276332732423385,0.011117896986913144,0.9224737743523871,0.01186386510534796 +flat_mae,patch,logistic,ppmi_dx,98,0.3593813663804626,test,0.61,0.04516575694040786,0.568536342515765,0.05156648929500703,0.5683361629881154,0.04917756764149903 +flat_mae,patch,logistic,ppmi_dx,99,0.3593813663804626,train,0.9306049822064056,0.010572687218672105,0.9263555082168813,0.01127789630810297,0.9245076000856347,0.011735827454261572 +flat_mae,patch,logistic,ppmi_dx,99,0.3593813663804626,test,0.59,0.04854805454392585,0.5746446726838883,0.049456456621127,0.5776740237691002,0.05036630075560171 +flat_mae,patch,logistic,ppmi_dx,100,0.005994842503189409,train,0.7384341637010676,0.015839973583024147,0.6982897461461768,0.019652809910502396,0.6910324341682723,0.018080416878681164 +flat_mae,patch,logistic,ppmi_dx,100,0.005994842503189409,test,0.63,0.042207008896627596,0.5713127099988413,0.05163937268904518,0.5742784380305602,0.046224384246821015 diff --git a/data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic/log.txt b/data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc935ece36ae11c04f8014dd0ebbaf0de78f3895 --- /dev/null +++ b/data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic/log.txt @@ -0,0 +1,247 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:20:34 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_1; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/eval_v2/ppmi_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: ppmi_dx (flat) +train (n=463): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 463 +}), + labels=['PD' 'Prodromal'], + counts=[178 285] +) + +validation (n=99): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 99 +}), + labels=['PD' 'Prodromal'], + counts=[39 60] +) + +test (n=100): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 100 +}), + labels=['PD' 'Prodromal'], + counts=[37 63] +) + +extracting features for all splits +extract (train) [ 0/232] eta: 0:15:52 time: 4.1060 data: 3.1872 max mem: 2698 +extract (train) [ 20/232] eta: 0:01:17 time: 0.1797 data: 0.0614 max mem: 2851 +extract (train) [ 40/232] eta: 0:00:50 time: 0.1570 data: 0.0468 max mem: 2851 +extract (train) [ 60/232] eta: 0:00:39 time: 0.1631 data: 0.0516 max mem: 2851 +extract (train) [ 80/232] eta: 0:00:32 time: 0.1603 data: 0.0509 max mem: 2851 +extract (train) [100/232] eta: 0:00:27 time: 0.1688 data: 0.0546 max mem: 2851 +extract (train) [120/232] eta: 0:00:22 time: 0.1581 data: 0.0492 max mem: 2851 +extract (train) [140/232] eta: 0:00:17 time: 0.1564 data: 0.0475 max mem: 2851 +extract (train) [160/232] eta: 0:00:13 time: 0.1589 data: 0.0483 max mem: 2851 +extract (train) [180/232] eta: 0:00:09 time: 0.1517 data: 0.0450 max mem: 2851 +extract (train) [200/232] eta: 0:00:05 time: 0.1477 data: 0.0433 max mem: 2851 +extract (train) [220/232] eta: 0:00:02 time: 0.1627 data: 0.0530 max mem: 2851 +extract (train) [231/232] eta: 0:00:00 time: 0.1504 data: 0.0463 max mem: 2851 +extract (train) Total time: 0:00:41 (0.1783 s / it) +extract (validation) [ 0/50] eta: 0:03:03 time: 3.6745 data: 3.4655 max mem: 2851 +extract (validation) [20/50] eta: 0:00:11 time: 0.2054 data: 0.0709 max mem: 2851 +extract (validation) [40/50] eta: 0:00:02 time: 0.1432 data: 0.0387 max mem: 2851 +extract (validation) [49/50] eta: 0:00:00 time: 0.1437 data: 0.0416 max mem: 2851 +extract (validation) Total time: 0:00:12 (0.2442 s / it) +extract (test) [ 0/50] eta: 0:02:58 time: 3.5646 data: 3.4284 max mem: 2851 +extract (test) [20/50] eta: 0:00:11 time: 0.2158 data: 0.0773 max mem: 2851 +extract (test) [40/50] eta: 0:00:02 time: 0.1352 data: 0.0355 max mem: 2851 +extract (test) [49/50] eta: 0:00:00 time: 0.1459 data: 0.0431 max mem: 2851 +extract (test) Total time: 0:00:12 (0.2445 s / it) +feature extraction time: 0:01:05 +train features: (463, 768) +validation features: (99, 768) +test features: (100, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | | 0.0059948 | train | 0.74377 | 0.017238 | 0.70576 | 0.021082 | 0.69812 | 0.019485 | +| flat_mae | patch | logistic | ppmi_dx | | 0.0059948 | test | 0.66 | 0.035259 | 0.56875 | 0.049124 | 0.57958 | 0.039471 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.04657585211244127, "f1": 0.5793334052421529, "f1_std": 0.05076462347024729, "bacc": 0.5785229202037352, "bacc_std": 0.050024494815787676} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.65, "acc_std": 0.046324939287601875, "f1": 0.6338529134846741, "f1_std": 0.04843507736701297, "bacc": 0.6362478777589134, "bacc_std": 0.04923399533444366} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.049084950850540744, "f1": 0.630450849963045, "f1_std": 0.05155355772887281, "bacc": 0.6311544991511036, "bacc_std": 0.051790678743298575} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.03985531834021653, "f1": 0.6745595331612613, "f1_std": 0.04647806575511383, "bacc": 0.6693548387096775, "bacc_std": 0.044073003065889876} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.04910776313374495, "f1": 0.5793334052421529, "f1_std": 0.05246830153419495, "bacc": 0.5785229202037352, "bacc_std": 0.05154971096762268} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.043933927664164055, "f1": 0.609375, "f1_std": 0.05325723969101963, "bacc": 0.6086587436332768, "bacc_std": 0.04859251463672096} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04629589614641885, "f1": 0.6009060511271707, "f1_std": 0.049341284629581, "bacc": 0.5997453310696095, "bacc_std": 0.04858673060721092} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.043199981481477516, "f1": 0.5783475783475784, "f1_std": 0.05055903971893245, "bacc": 0.5793718166383701, "bacc_std": 0.04628881605684955} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.0395913930040356, "f1": 0.6553308823529411, "f1_std": 0.048553197702691085, "bacc": 0.6511035653650254, "bacc_std": 0.04444857808079524} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04253516192516493, "f1": 0.5847828526540231, "f1_std": 0.04814450581702176, "bacc": 0.5844651952461799, "bacc_std": 0.04529359556340037} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.040911900469178884, "f1": 0.6553308823529411, "f1_std": 0.05027566723226871, "bacc": 0.6511035653650254, "bacc_std": 0.04615250303031808} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.03932333149670816, "f1": 0.5952380952380952, "f1_std": 0.04951542241207683, "bacc": 0.5984719864176571, "bacc_std": 0.04312967331998438} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 13, "C": 21.54434690031882, "split": "test", "acc": 0.61, "acc_std": 0.04793069580133382, "f1": 0.5741893219783819, "f1_std": 0.05184831910391855, "bacc": 0.5734295415959253, "bacc_std": 0.05026799577660129} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04402055883334514, "f1": 0.5713127099988413, "f1_std": 0.052974682897818676, "bacc": 0.5742784380305602, "bacc_std": 0.04749155712595072} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.043483601506774945, "f1": 0.6072270227808326, "f1_std": 0.04960612012568691, "bacc": 0.6056876061120543, "bacc_std": 0.046483951635932254} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 16, "C": 1291.5496650148827, "split": "test", "acc": 0.61, "acc_std": 0.04822603446272563, "f1": 0.5882166613873931, "f1_std": 0.05022631046021138, "bacc": 0.5887096774193548, "bacc_std": 0.05062858093510596} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 17, "C": 2.782559402207126, "split": "test", "acc": 0.56, "acc_std": 0.04818213776909447, "f1": 0.5280995280995281, "f1_std": 0.05004368302174997, "bacc": 0.5280135823429541, "bacc_std": 0.04951026787993568} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.58, "acc_std": 0.04675637282766917, "f1": 0.525101763907734, "f1_std": 0.051617500646467036, "bacc": 0.5288624787775891, "bacc_std": 0.0481300689917646} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.56, "acc_std": 0.04821652828647039, "f1": 0.5452666391070691, "f1_std": 0.04931207792472886, "bacc": 0.5483870967741935, "bacc_std": 0.05069699630301417} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04198134823942652, "f1": 0.5634191176470589, "f1_std": 0.05006100586719623, "bacc": 0.566213921901528, "bacc_std": 0.045389328434490776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.75, "acc_std": 0.03862783970143814, "f1": 0.7194478734148805, "f1_std": 0.046020339778095797, "bacc": 0.7117996604414262, "bacc_std": 0.04388800191613831} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.045886224512374084, "f1": 0.609375, "f1_std": 0.04960413122133008, "bacc": 0.6078098471986417, "bacc_std": 0.048644847530646664} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04746301296799435, "f1": 0.5577607593571352, "f1_std": 0.051829222853880315, "bacc": 0.5573005093378608, "bacc_std": 0.05072215799779092} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04506384359994162, "f1": 0.6239316239316239, "f1_std": 0.05300692442838085, "bacc": 0.6218166383701189, "bacc_std": 0.049060753620487146} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.03807237318581545, "f1": 0.5071523019593701, "f1_std": 0.049415883309391555, "bacc": 0.5216468590831919, "bacc_std": 0.041352327178294984} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 26, "C": 2.782559402207126, "split": "test", "acc": 0.65, "acc_std": 0.04559133250959003, "f1": 0.6224786970121885, "f1_std": 0.04928858107109426, "bacc": 0.6209677419354839, "bacc_std": 0.048476985163827334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.56, "acc_std": 0.04769318609612907, "f1": 0.5331069609507639, "f1_std": 0.049626802306315526, "bacc": 0.5331069609507639, "bacc_std": 0.04945408200610041} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.68, "acc_std": 0.04409362312171681, "f1": 0.6604414261460102, "f1_std": 0.0467052166755184, "bacc": 0.6604414261460102, "bacc_std": 0.04650774182969822} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.043067997399461225, "f1": 0.6467236467236468, "f1_std": 0.05087853709445476, "bacc": 0.6430390492359932, "bacc_std": 0.04712506738652775} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.039638244158892805, "f1": 0.6114618746964546, "f1_std": 0.052507065459384675, "bacc": 0.6146010186757216, "bacc_std": 0.04464248726395057} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 31, "C": 2.782559402207126, "split": "test", "acc": 0.73, "acc_std": 0.04330267428231195, "f1": 0.7052079921388797, "f1_std": 0.04811900056834379, "bacc": 0.7007640067911716, "bacc_std": 0.04728903020090803} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.047498311548938243, "f1": 0.6178622120318812, "f1_std": 0.0521888010466694, "bacc": 0.615874363327674, "bacc_std": 0.05073711253333573} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 33, "C": 2.782559402207126, "split": "test", "acc": 0.61, "acc_std": 0.04678331326445359, "f1": 0.5953937130407718, "f1_std": 0.047778236064547605, "bacc": 0.5988964346349746, "bacc_std": 0.048412857646781625} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.0414789778080415, "f1": 0.66078697421981, "f1_std": 0.04907272849384992, "bacc": 0.6561969439728353, "bacc_std": 0.0462896824089861} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 35, "C": 2.782559402207126, "split": "test", "acc": 0.62, "acc_std": 0.04939607676728994, "f1": 0.6072757337742869, "f1_std": 0.05035850410573864, "bacc": 0.6120543293718166, "bacc_std": 0.051291506097500404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.0432086796836006, "f1": 0.6239316239316239, "f1_std": 0.05098854173256505, "bacc": 0.6218166383701189, "bacc_std": 0.04703630933881832} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.044008617337971435, "f1": 0.6072270227808326, "f1_std": 0.05086361558435065, "bacc": 0.6056876061120543, "bacc_std": 0.04782298598452175} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 38, "C": 0.3593813663804626, "split": "test", "acc": 0.51, "acc_std": 0.05031262267065791, "f1": 0.4873940788785438, "f1_std": 0.05166238234843808, "bacc": 0.4876910016977929, "bacc_std": 0.05232252919018952} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.71, "acc_std": 0.04497811023153373, "f1": 0.6871966346672419, "f1_std": 0.04877804965365929, "bacc": 0.684634974533107, "bacc_std": 0.04814916433329234} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 40, "C": 2.782559402207126, "split": "test", "acc": 0.6, "acc_std": 0.048248423808452026, "f1": 0.586606035551881, "f1_std": 0.04935212502174878, "bacc": 0.5908319185059423, "bacc_std": 0.05051606991703183} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 41, "C": 166.81005372000556, "split": "test", "acc": 0.6, "acc_std": 0.04624499972970051, "f1": 0.5833333333333333, "f1_std": 0.04752848211787758, "bacc": 0.5857385398981324, "bacc_std": 0.048317275124008185} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.03625383290081201, "f1": 0.627359057579036, "f1_std": 0.04801521268524075, "bacc": 0.6277589134125636, "bacc_std": 0.04120893235277005} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04172682111064776, "f1": 0.609375, "f1_std": 0.05053998134630239, "bacc": 0.6086587436332768, "bacc_std": 0.04600012741731729} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 44, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.04467072419381624, "f1": 0.5659722222222222, "f1_std": 0.04938129915528078, "bacc": 0.565365025466893, "bacc_std": 0.04819709453599131} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.039960860851588266, "f1": 0.6428571428571428, "f1_std": 0.052097715650376505, "bacc": 0.6409168081494058, "bacc_std": 0.0457763088294024} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 46, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04448066546264792, "f1": 0.5464100011063171, "f1_std": 0.048726322779253324, "bacc": 0.5471137521222411, "bacc_std": 0.04627956417959851} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 47, "C": 166.81005372000556, "split": "test", "acc": 0.64, "acc_std": 0.04864606458902919, "f1": 0.6216897856242118, "f1_std": 0.05125811738314717, "bacc": 0.6230899830220713, "bacc_std": 0.051853612662766856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.73, "acc_std": 0.040806783749763965, "f1": 0.7012943909724527, "f1_std": 0.046831541148704754, "bacc": 0.6956706281833617, "bacc_std": 0.0453410560463028} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 49, "C": 0.3593813663804626, "split": "test", "acc": 0.68, "acc_std": 0.04522273322124614, "f1": 0.6567996567996568, "f1_std": 0.0490402176656509, "bacc": 0.6553480475382003, "bacc_std": 0.04860585591548598} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 50, "C": 1291.5496650148827, "split": "test", "acc": 0.57, "acc_std": 0.04726373239599259, "f1": 0.5413333333333333, "f1_std": 0.049196590634802945, "bacc": 0.5411714770797962, "bacc_std": 0.048898238646628005} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.62, "acc_std": 0.04749515764791186, "f1": 0.5876736111111112, "f1_std": 0.0515213030503951, "bacc": 0.5865874363327674, "bacc_std": 0.05026524302414357} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.57, "acc_std": 0.04954372210482373, "f1": 0.5501621508525996, "f1_std": 0.05124321190346506, "bacc": 0.551358234295416, "bacc_std": 0.05189666940914281} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04408698674212153, "f1": 0.6263736263736264, "f1_std": 0.048899709396922904, "bacc": 0.6239388794567062, "bacc_std": 0.047114459652987405} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.57, "acc_std": 0.04631496950231102, "f1": 0.5361881134721174, "f1_std": 0.04813824100853229, "bacc": 0.5360780984719864, "bacc_std": 0.047352537903094946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04176721680935898, "f1": 0.6239316239316239, "f1_std": 0.050372856141707646, "bacc": 0.6218166383701189, "bacc_std": 0.04611201290814557} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04237109864046482, "f1": 0.609375, "f1_std": 0.05028585650521563, "bacc": 0.6086587436332768, "bacc_std": 0.045657313171607615} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04353192391797082, "f1": 0.612789025334661, "f1_std": 0.04938304663217563, "bacc": 0.6107809847198642, "bacc_std": 0.04700648474207538} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.040768276882890205, "f1": 0.6695156695156695, "f1_std": 0.04864778068931428, "bacc": 0.6642614601018676, "bacc_std": 0.04513659711290077} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 59, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.047593344072464586, "f1": 0.5577607593571352, "f1_std": 0.05131076440209389, "bacc": 0.5573005093378608, "bacc_std": 0.05028650863531308} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04394186614152841, "f1": 0.5558672276764843, "f1_std": 0.05276214146481755, "bacc": 0.5611205432937181, "bacc_std": 0.046962157640764335} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.0429848857157955, "f1": 0.612789025334661, "f1_std": 0.04852484341873178, "bacc": 0.6107809847198642, "bacc_std": 0.046242462602962005} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04693516805125981, "f1": 0.5523528769516323, "f1_std": 0.05142590250979453, "bacc": 0.5522071307300509, "bacc_std": 0.04952405996118903} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.039223844788597674, "f1": 0.5287698412698413, "f1_std": 0.05232415599448484, "bacc": 0.5458404074702886, "bacc_std": 0.04265631967723569} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04171213732236698, "f1": 0.5324918186068257, "f1_std": 0.04865512546920447, "bacc": 0.5398981324278438, "bacc_std": 0.04370677613830487} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04122365825590932, "f1": 0.5558672276764843, "f1_std": 0.04926977798342412, "bacc": 0.5611205432937181, "bacc_std": 0.043565355522523504} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.57, "acc_std": 0.04632328140363115, "f1": 0.5242836596968692, "f1_std": 0.05075447169625858, "bacc": 0.5258913412563667, "bacc_std": 0.048265201061921365} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.04885182084631032, "f1": 0.5543293718166383, "f1_std": 0.05082351472148338, "bacc": 0.5543293718166383, "bacc_std": 0.05091677709065629} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 68, "C": 10000.0, "split": "test", "acc": 0.61, "acc_std": 0.05032061605346262, "f1": 0.5920075321686369, "f1_std": 0.051428650607478595, "bacc": 0.5938030560271647, "bacc_std": 0.05176689104172444} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04601097260436906, "f1": 0.5876736111111112, "f1_std": 0.04963759833105474, "bacc": 0.5865874363327674, "bacc_std": 0.048547625740989156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 70, "C": 21.54434690031882, "split": "test", "acc": 0.58, "acc_std": 0.05106965831097757, "f1": 0.5716034271725826, "f1_std": 0.051158644035989326, "bacc": 0.5797962648556876, "bacc_std": 0.052613433115693733} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.043330409644959496, "f1": 0.6408295678368672, "f1_std": 0.052917225951874224, "bacc": 0.6379456706281834, "bacc_std": 0.047832262720110166} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04604420050342931, "f1": 0.6178622120318812, "f1_std": 0.04998652247559603, "bacc": 0.615874363327674, "bacc_std": 0.048639499402014535} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.0434587804706943, "f1": 0.5783475783475784, "f1_std": 0.05049439762845675, "bacc": 0.5793718166383701, "bacc_std": 0.046402310907814125} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.56, "acc_std": 0.042678537931845784, "f1": 0.4944852941176471, "f1_std": 0.0479215521185412, "bacc": 0.5025466893039049, "bacc_std": 0.043786950718445884} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.044584566836518666, "f1": 0.5783475783475784, "f1_std": 0.051846529612908386, "bacc": 0.5793718166383701, "bacc_std": 0.04755958083202702} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.047723855669884845, "f1": 0.5523528769516323, "f1_std": 0.052014778171367994, "bacc": 0.5522071307300509, "bacc_std": 0.05008298206490761} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.71, "acc_std": 0.045106097148833436, "f1": 0.6791680495630048, "f1_std": 0.05074919778132066, "bacc": 0.6744482173174873, "bacc_std": 0.048931426241086345} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 78, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.04724287882845413, "f1": 0.5320855614973261, "f1_std": 0.051529424081888174, "bacc": 0.533955857385399, "bacc_std": 0.049035123153245146} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.042518344276323825, "f1": 0.6259934548854604, "f1_std": 0.052485460827112064, "bacc": 0.6247877758913413, "bacc_std": 0.04677805674579786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.04184542985799046, "f1": 0.6408295678368672, "f1_std": 0.05121785794997131, "bacc": 0.6379456706281834, "bacc_std": 0.04642764661876571} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.044027718541845874, "f1": 0.6615351020853806, "f1_std": 0.04874209097764598, "bacc": 0.6583191850594228, "bacc_std": 0.04756623660892718} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.03922805118789359, "f1": 0.6033177064551027, "f1_std": 0.05260958128177306, "bacc": 0.6065365025466893, "bacc_std": 0.04467356584724605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.57, "acc_std": 0.047828443420207596, "f1": 0.5305164319248826, "f1_std": 0.05140070801507274, "bacc": 0.5309847198641766, "bacc_std": 0.04996796056960766} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.043561157009427566, "f1": 0.5477159656264134, "f1_std": 0.04997458906526712, "bacc": 0.5500848896434635, "bacc_std": 0.04632567818596821} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.042369347410598626, "f1": 0.5703301673450927, "f1_std": 0.04786310297134437, "bacc": 0.5713073005093379, "bacc_std": 0.044713056243769395} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.038678050623060095, "f1": 0.5783730158730158, "f1_std": 0.05276757553143949, "bacc": 0.5882852292020373, "bacc_std": 0.043119798965421174} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.048247379203434464, "f1": 0.568536342515765, "f1_std": 0.054454547141360665, "bacc": 0.5683361629881154, "bacc_std": 0.05183958645243559} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.0412782703125991, "f1": 0.5558672276764843, "f1_std": 0.050708729047478904, "bacc": 0.5611205432937181, "bacc_std": 0.04464124587598062} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.0379469946109043, "f1": 0.5952380952380952, "f1_std": 0.04999469758801305, "bacc": 0.5984719864176571, "bacc_std": 0.042972322941283586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04497494413559621, "f1": 0.6043956043956044, "f1_std": 0.048734623331713134, "bacc": 0.6027164685908319, "bacc_std": 0.04690619405378731} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04284603132146548, "f1": 0.609375, "f1_std": 0.04651477866994276, "bacc": 0.6078098471986417, "bacc_std": 0.04555886325102043} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.04459809412968227, "f1": 0.570999570999571, "f1_std": 0.04841844591623819, "bacc": 0.5704584040747029, "bacc_std": 0.04803997111007479} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.56, "acc_std": 0.045014179988088196, "f1": 0.5225694444444444, "f1_std": 0.04830942714237806, "bacc": 0.5229202037351443, "bacc_std": 0.04712169261922371} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.71, "acc_std": 0.04131684402274695, "f1": 0.6791680495630048, "f1_std": 0.04696064506373695, "bacc": 0.6744482173174873, "bacc_std": 0.04517428467458815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.044525839688881776, "f1": 0.539894512400404, "f1_std": 0.05023487383565489, "bacc": 0.5420203735144312, "bacc_std": 0.04690450955038525} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.58, "acc_std": 0.04383559740667395, "f1": 0.5174632352941176, "f1_std": 0.05000271304685942, "bacc": 0.5237691001697793, "bacc_std": 0.04574867003543929} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.73, "acc_std": 0.03869852193559853, "f1": 0.6815662224318906, "f1_std": 0.05026915458821272, "bacc": 0.6752971137521222, "bacc_std": 0.04492819149236752} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.04516575694040786, "f1": 0.568536342515765, "f1_std": 0.05156648929500703, "bacc": 0.5683361629881154, "bacc_std": 0.04917756764149903} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04854805454392585, "f1": 0.5746446726838883, "f1_std": 0.049456456621127, "bacc": 0.5776740237691002, "bacc_std": 0.05036630075560171} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.042207008896627596, "f1": 0.5713127099988413, "f1_std": 0.05163937268904518, "bacc": 0.5742784380305602, "bacc_std": 0.046224384246821015} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | train | 100 | 129.87 | 1013.6 | 0.84007 | 0.094713 | 0.81917 | 0.1106 | 0.81273 | 0.11263 | +| flat_mae | patch | logistic | ppmi_dx | test | 100 | 129.87 | 1013.6 | 0.6342 | 0.048267 | 0.5938 | 0.049532 | 0.59422 | 0.047006 | + + +done! total time: 0:05:13 diff --git a/data_scaling/n1600_1/pretrain/config.yaml b/data_scaling/n1600_1/pretrain/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4badb23529dd5cb5c1af7ced7062d02bf7ea675 --- /dev/null +++ b/data_scaling/n1600_1/pretrain/config.yaml @@ -0,0 +1,109 @@ +name: data_scaling/n1600_1/pretrain +notes: data scaling experiment n1600_1 (seed=1644) +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..01599}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 1644 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 diff --git a/data_scaling/n1600_1/pretrain/log.json b/data_scaling/n1600_1/pretrain/log.json new file mode 100644 index 0000000000000000000000000000000000000000..07307630f518af214c933389ca695d031a838b4b --- /dev/null +++ b/data_scaling/n1600_1/pretrain/log.json @@ -0,0 +1,100 @@ +{"epoch": 0, "train/lr": 1.2502400076802458e-05, "train/grad": 0.05545806923270225, "train/loss": 0.9930883884525299, "eval/hcp-train-subset/loss": 0.9889252128139618, "eval/hcp-val/loss": 0.9894907484131474, "eval/nsd-val/loss": 0.9888719656775075} +{"epoch": 1, "train/lr": 3.750320010240327e-05, "train/grad": 0.08286450159043074, "train/loss": 0.9878297120475769, "eval/hcp-train-subset/loss": 0.9860129067974706, "eval/hcp-val/loss": 0.9858945867707652, "eval/nsd-val/loss": 0.9874046858279936} +{"epoch": 2, "train/lr": 6.250400012800409e-05, "train/grad": 0.10217893488641946, "train/loss": 0.9843060559940339, "eval/hcp-train-subset/loss": 0.9821028171047088, "eval/hcp-val/loss": 0.9814827105691356, "eval/nsd-val/loss": 0.983334009685824} +{"epoch": 3, "train/lr": 8.75048001536049e-05, "train/grad": 0.19414816269765534, "train/loss": 0.9765993752479554, "eval/hcp-train-subset/loss": 0.964917536704771, "eval/hcp-val/loss": 0.9656026334531845, "eval/nsd-val/loss": 0.9546214428640181} +{"epoch": 4, "train/lr": 0.00011250559953918529, "train/grad": 0.23641702287278418, "train/loss": 0.9444326673889161, "eval/hcp-train-subset/loss": 0.9198482151954405, "eval/hcp-val/loss": 0.918366321632939, "eval/nsd-val/loss": 0.8879416806082572} +{"epoch": 5, "train/lr": 0.00012498860637884563, "train/grad": 0.1762712506789633, "train/loss": 0.9050303762531281, "eval/hcp-train-subset/loss": 0.8821757801117436, "eval/hcp-val/loss": 0.8804352975660755, "eval/nsd-val/loss": 0.8465685267602244} +{"epoch": 6, "train/lr": 0.0001249202705377922, "train/grad": 0.11209929132865082, "train/loss": 0.8762222838401794, "eval/hcp-train-subset/loss": 0.8676259902215773, "eval/hcp-val/loss": 0.8657109439373016, "eval/nsd-val/loss": 0.8323166783778898} +{"epoch": 7, "train/lr": 0.0001247836790473516, "train/grad": 0.08783680580022164, "train/loss": 0.8643819067573547, "eval/hcp-train-subset/loss": 0.8597344919558494, "eval/hcp-val/loss": 0.8586478646724455, "eval/nsd-val/loss": 0.8258990453135583} +{"epoch": 8, "train/lr": 0.000124578981268311, "train/grad": 0.07811217500862645, "train/loss": 0.8557559607410431, "eval/hcp-train-subset/loss": 0.8556019644583425, "eval/hcp-val/loss": 0.8547820237375074, "eval/nsd-val/loss": 0.8205708899805623} +{"epoch": 9, "train/lr": 0.00012430640103468907, "train/grad": 0.07048717211399419, "train/loss": 0.8545962369441986, "eval/hcp-train-subset/loss": 0.8526738036063409, "eval/hcp-val/loss": 0.8517135506676089, "eval/nsd-val/loss": 0.8175800577286751} +{"epoch": 10, "train/lr": 0.00012396623640896796, "train/grad": 0.06803832967287447, "train/loss": 0.8508944048786163, "eval/hcp-train-subset/loss": 0.8497168508268171, "eval/hcp-val/loss": 0.8488818945423249, "eval/nsd-val/loss": 0.8188272310841468} +{"epoch": 11, "train/lr": 0.0001235588593561712, "train/grad": 0.06587930969600583, "train/loss": 0.8485883133983612, "eval/hcp-train-subset/loss": 0.8483460882017689, "eval/hcp-val/loss": 0.8473753948365489, "eval/nsd-val/loss": 0.8184263956162238} +{"epoch": 12, "train/lr": 0.00012308471533712604, "train/grad": 0.06557622207361481, "train/loss": 0.8458848006725311, "eval/hcp-train-subset/loss": 0.8475507824651657, "eval/hcp-val/loss": 0.8467946312119884, "eval/nsd-val/loss": 0.8177478342287002} +{"epoch": 13, "train/lr": 0.00012254432282135565, "train/grad": 0.06458207453742967, "train/loss": 0.844537647151947, "eval/hcp-train-subset/loss": 0.8458217593931383, "eval/hcp-val/loss": 0.8448929844364044, "eval/nsd-val/loss": 0.81584805826987} +{"epoch": 14, "train/lr": 0.00012193827272014171, "train/grad": 0.06531056473639801, "train/loss": 0.8413428853034973, "eval/hcp-train-subset/loss": 0.8452085689190896, "eval/hcp-val/loss": 0.844361214868484, "eval/nsd-val/loss": 0.8128735644202079} +{"epoch": 15, "train/lr": 0.00012126722774037197, "train/grad": 0.06426782705898608, "train/loss": 0.8410958337402343, "eval/hcp-train-subset/loss": 0.8438764358720472, "eval/hcp-val/loss": 0.8435689374323814, "eval/nsd-val/loss": 0.8121562917386332} +{"epoch": 16, "train/lr": 0.00012053192165988122, "train/grad": 0.06471067073894955, "train/loss": 0.84128668217659, "eval/hcp-train-subset/loss": 0.8435015149654881, "eval/hcp-val/loss": 0.8430281012288986, "eval/nsd-val/loss": 0.8138134229567743} +{"epoch": 17, "train/lr": 0.00011973315852507104, "train/grad": 0.06448625176477218, "train/loss": 0.839119826412201, "eval/hcp-train-subset/loss": 0.841947139270844, "eval/hcp-val/loss": 0.8419165082516209, "eval/nsd-val/loss": 0.813897531840109} +{"epoch": 18, "train/lr": 0.00011887181177170142, "train/grad": 0.06509696488076686, "train/loss": 0.8394593396377563, "eval/hcp-train-subset/loss": 0.8409752672718417, "eval/hcp-val/loss": 0.8414447067245361, "eval/nsd-val/loss": 0.8135832403936694} +{"epoch": 19, "train/lr": 0.00011794882326980209, "train/grad": 0.06545057254007802, "train/loss": 0.8365891694641113, "eval/hcp-train-subset/loss": 0.8410299199242746, "eval/hcp-val/loss": 0.8415041700486214, "eval/nsd-val/loss": 0.8118040677039854} +{"epoch": 20, "train/lr": 0.00011696520229374954, "train/grad": 0.06679961962823068, "train/loss": 0.835566320066452, "eval/hcp-train-subset/loss": 0.8404919851210809, "eval/hcp-val/loss": 0.8418048850951656, "eval/nsd-val/loss": 0.8142738044261932} +{"epoch": 21, "train/lr": 0.00011592202441863837, "train/grad": 0.06814518323461508, "train/loss": 0.8356289879131317, "eval/hcp-train-subset/loss": 0.8402332038648667, "eval/hcp-val/loss": 0.8410586695517263, "eval/nsd-val/loss": 0.8121104932600453} +{"epoch": 22, "train/lr": 0.00011482043034415979, "train/grad": 0.06791599852269853, "train/loss": 0.8356523753547669, "eval/hcp-train-subset/loss": 0.8401786175466353, "eval/hcp-val/loss": 0.8403567652548513, "eval/nsd-val/loss": 0.811907087602923} +{"epoch": 23, "train/lr": 0.00011366162464726024, "train/grad": 0.06845815567833739, "train/loss": 0.8345672989368439, "eval/hcp-train-subset/loss": 0.8406239594182661, "eval/hcp-val/loss": 0.8408858112750515, "eval/nsd-val/loss": 0.8128614233386132} +{"epoch": 24, "train/lr": 0.0001124468744649569, "train/grad": 0.06919807704611564, "train/loss": 0.8329862614154816, "eval/hcp-train-subset/loss": 0.8389943684301069, "eval/hcp-val/loss": 0.8398811922919366, "eval/nsd-val/loss": 0.8122480607801869} +{"epoch": 25, "train/lr": 0.0001111775081087387, "train/grad": 0.06972174965222003, "train/loss": 0.8322967049789429, "eval/hcp-train-subset/loss": 0.8384640101463564, "eval/hcp-val/loss": 0.8402991323701797, "eval/nsd-val/loss": 0.8137914730656531} +{"epoch": 26, "train/lr": 0.0001098549136120796, "train/grad": 0.06965054734367761, "train/loss": 0.8316056953334808, "eval/hcp-train-subset/loss": 0.8386012661841608, "eval/hcp-val/loss": 0.8394260473789708, "eval/nsd-val/loss": 0.8124627874743554} +{"epoch": 27, "train/lr": 0.00010848053721264312, "train/grad": 0.07216839120090103, "train/loss": 0.8300705437088013, "eval/hcp-train-subset/loss": 0.838945452244051, "eval/hcp-val/loss": 0.839992921198568, "eval/nsd-val/loss": 0.813593368376455} +{"epoch": 28, "train/lr": 0.00010705588177084458, "train/grad": 0.07131247089262464, "train/loss": 0.8321171138572693, "eval/hcp-train-subset/loss": 0.8375101118318496, "eval/hcp-val/loss": 0.8397253742141109, "eval/nsd-val/loss": 0.8128123235317969} +{"epoch": 29, "train/lr": 0.00010558250512649171, "train/grad": 0.07260122727340376, "train/loss": 0.8293428528308868, "eval/hcp-train-subset/loss": 0.8384848650424711, "eval/hcp-val/loss": 0.8392143701353381, "eval/nsd-val/loss": 0.8115945083479728} +{"epoch": 30, "train/lr": 0.00010406201839531515, "train/grad": 0.07419106442059144, "train/loss": 0.8286445176506042, "eval/hcp-train-subset/loss": 0.83750170277011, "eval/hcp-val/loss": 0.8389752661028216, "eval/nsd-val/loss": 0.8109349320011754} +{"epoch": 31, "train/lr": 0.00010249608420723018, "train/grad": 0.07641159152855355, "train/loss": 0.8258021451568603, "eval/hcp-train-subset/loss": 0.8366419970989227, "eval/hcp-val/loss": 0.8388368025902779, "eval/nsd-val/loss": 0.8129734493071034} +{"epoch": 32, "train/lr": 0.00010088641488828097, "train/grad": 0.07667376090760077, "train/loss": 0.8260023865795135, "eval/hcp-train-subset/loss": 0.8374284534685074, "eval/hcp-val/loss": 0.8389437852367279, "eval/nsd-val/loss": 0.8145863346515163} +{"epoch": 33, "train/lr": 9.923477058823526e-05, "train/grad": 0.07514876812856248, "train/loss": 0.829807050409317, "eval/hcp-train-subset/loss": 0.8365410806671265, "eval/hcp-val/loss": 0.8388299663220683, "eval/nsd-val/loss": 0.8133123238240519} +{"epoch": 34, "train/lr": 9.754295735588547e-05, "train/grad": 0.07770039732764725, "train/loss": 0.8258356702423095, "eval/hcp-train-subset/loss": 0.8363725485340241, "eval/hcp-val/loss": 0.8391252390799984, "eval/nsd-val/loss": 0.8154099324057179} +{"epoch": 35, "train/lr": 9.581282516416285e-05, "train/grad": 0.0814709539950247, "train/loss": 0.823426164598465, "eval/hcp-train-subset/loss": 0.8358779232348165, "eval/hcp-val/loss": 0.8379972711686166, "eval/nsd-val/loss": 0.8121218142970916} +{"epoch": 36, "train/lr": 9.404626588721676e-05, "train/grad": 0.07903308177273839, "train/loss": 0.8261655047988892, "eval/hcp-train-subset/loss": 0.836035224699205, "eval/hcp-val/loss": 0.8380528246202776, "eval/nsd-val/loss": 0.8116352308181024} +{"epoch": 37, "train/lr": 9.224521123168153e-05, "train/grad": 0.08034719335523838, "train/loss": 0.8253982328319549, "eval/hcp-train-subset/loss": 0.8357691274535272, "eval/hcp-val/loss": 0.8385408780267162, "eval/nsd-val/loss": 0.8111752492766227} +{"epoch": 38, "train/lr": 9.041163062437843e-05, "train/grad": 0.08262559820198528, "train/loss": 0.8228892577457428, "eval/hcp-train-subset/loss": 0.8351409819818312, "eval/hcp-val/loss": 0.8373288156524781, "eval/nsd-val/loss": 0.8094300935345311} +{"epoch": 39, "train/lr": 8.85475290587822e-05, "train/grad": 0.08277864003774928, "train/loss": 0.8240690303230286, "eval/hcp-train-subset/loss": 0.8342907995946945, "eval/hcp-val/loss": 0.8374381065368652, "eval/nsd-val/loss": 0.8091753077122473} +{"epoch": 40, "train/lr": 8.665494490258622e-05, "train/grad": 0.08392124131164501, "train/loss": 0.8219595784282684, "eval/hcp-train-subset/loss": 0.8348419916245245, "eval/hcp-val/loss": 0.8380500001292075, "eval/nsd-val/loss": 0.8128880050874525} +{"epoch": 41, "train/lr": 8.473594766877838e-05, "train/grad": 0.08357138202970468, "train/loss": 0.8238390454006195, "eval/hcp-train-subset/loss": 0.8346381764258107, "eval/hcp-val/loss": 0.8372843496261104, "eval/nsd-val/loss": 0.8118933900710075} +{"epoch": 42, "train/lr": 8.279263575265999e-05, "train/grad": 0.0852401707335121, "train/loss": 0.8206738440132141, "eval/hcp-train-subset/loss": 0.8339106209816471, "eval/hcp-val/loss": 0.8370064352789233, "eval/nsd-val/loss": 0.8113252453265651} +{"epoch": 43, "train/lr": 8.082713413727944e-05, "train/grad": 0.08610054351050746, "train/loss": 0.8224782637023926, "eval/hcp-train-subset/loss": 0.8341090121576863, "eval/hcp-val/loss": 0.8374893021199011, "eval/nsd-val/loss": 0.8090211729849538} +{"epoch": 44, "train/lr": 7.884159206979602e-05, "train/grad": 0.08724001364758309, "train/loss": 0.8217249787902832, "eval/hcp-train-subset/loss": 0.8329994236269305, "eval/hcp-val/loss": 0.8365542744436572, "eval/nsd-val/loss": 0.8193720848329605} +{"epoch": 45, "train/lr": 7.683818071130916e-05, "train/grad": 0.08803683168779416, "train/loss": 0.8191106197452546, "eval/hcp-train-subset/loss": 0.8345878614533332, "eval/hcp-val/loss": 0.8379920942168082, "eval/nsd-val/loss": 0.8078015215935246} +{"epoch": 46, "train/lr": 7.481909076272522e-05, "train/grad": 0.08826163520883223, "train/loss": 0.8212416188144683, "eval/hcp-train-subset/loss": 0.8336450371050066, "eval/hcp-val/loss": 0.8380480552873304, "eval/nsd-val/loss": 0.8102361992482217} +{"epoch": 47, "train/lr": 7.278653006925963e-05, "train/grad": 0.08935162730921573, "train/loss": 0.8198888573169708, "eval/hcp-train-subset/loss": 0.8313031792640686, "eval/hcp-val/loss": 0.8354651754902255, "eval/nsd-val/loss": 0.8095078843255197} +{"epoch": 48, "train/lr": 7.074272120618864e-05, "train/grad": 0.0902447698580297, "train/loss": 0.8197610415363312, "eval/hcp-train-subset/loss": 0.831763098316808, "eval/hcp-val/loss": 0.8364640628137896, "eval/nsd-val/loss": 0.8105722194717776} +{"epoch": 49, "train/lr": 6.868989904849677e-05, "train/grad": 0.09336505553037615, "train/loss": 0.8168129331493378, "eval/hcp-train-subset/loss": 0.831306254671466, "eval/hcp-val/loss": 0.8355099828012528, "eval/nsd-val/loss": 0.8105818321627956} +{"epoch": 50, "train/lr": 6.6630308327075e-05, "train/grad": 0.09256990788299045, "train/loss": 0.8187916173744202, "eval/hcp-train-subset/loss": 0.8322615152405154, "eval/hcp-val/loss": 0.8364039130749241, "eval/nsd-val/loss": 0.8111060011771417} +{"epoch": 51, "train/lr": 6.456620117413798e-05, "train/grad": 0.0948341737206782, "train/loss": 0.8173416146183013, "eval/hcp-train-subset/loss": 0.831268306701414, "eval/hcp-val/loss": 0.8354723232407724, "eval/nsd-val/loss": 0.8122565823216592} +{"epoch": 52, "train/lr": 6.249983466055255e-05, "train/grad": 0.09708388265069054, "train/loss": 0.8162657193851471, "eval/hcp-train-subset/loss": 0.8309815112621554, "eval/hcp-val/loss": 0.8344371789886106, "eval/nsd-val/loss": 0.8147299145498583} +{"epoch": 53, "train/lr": 6.0433468327763305e-05, "train/grad": 0.09679741528727086, "train/loss": 0.8171122093200683, "eval/hcp-train-subset/loss": 0.8314313119457614, "eval/hcp-val/loss": 0.8354701668985428, "eval/nsd-val/loss": 0.8137543489856105} +{"epoch": 54, "train/lr": 5.83693617170174e-05, "train/grad": 0.09937295477679918, "train/loss": 0.8134623267745972, "eval/hcp-train-subset/loss": 0.8297548544022345, "eval/hcp-val/loss": 0.8341416803098494, "eval/nsd-val/loss": 0.8123682968078121} +{"epoch": 55, "train/lr": 5.6309771898588165e-05, "train/grad": 0.09787767159594371, "train/loss": 0.8170859803581237, "eval/hcp-train-subset/loss": 0.8289686816353952, "eval/hcp-val/loss": 0.835041202845112, "eval/nsd-val/loss": 0.8110934630517037} +{"epoch": 56, "train/lr": 5.4256951003704155e-05, "train/grad": 0.10166593151572839, "train/loss": 0.8144594793033599, "eval/hcp-train-subset/loss": 0.8283877766901447, "eval/hcp-val/loss": 0.8356291420998112, "eval/nsd-val/loss": 0.8108816108395976} +{"epoch": 57, "train/lr": 5.221314376187425e-05, "train/grad": 0.10273214708104293, "train/loss": 0.8151640394496917, "eval/hcp-train-subset/loss": 0.8286586557665179, "eval/hcp-val/loss": 0.834459892203731, "eval/nsd-val/loss": 0.8103062375899284} +{"epoch": 58, "train/lr": 5.018058504631059e-05, "train/grad": 0.10383509647576289, "train/loss": 0.8140451854515076, "eval/hcp-train-subset/loss": 0.8290613937762475, "eval/hcp-val/loss": 0.8347364481418363, "eval/nsd-val/loss": 0.8118662584212518} +{"epoch": 59, "train/lr": 4.816149743012713e-05, "train/grad": 0.10310381836503416, "train/loss": 0.8147977389240265, "eval/hcp-train-subset/loss": 0.8288227742718112, "eval/hcp-val/loss": 0.8341990055576447, "eval/nsd-val/loss": 0.8089047574227856} +{"epoch": 60, "train/lr": 4.615808875598772e-05, "train/grad": 0.10505972772042244, "train/loss": 0.8141993965530395, "eval/hcp-train-subset/loss": 0.826528343462175, "eval/hcp-val/loss": 0.8339134502795434, "eval/nsd-val/loss": 0.8129749038527089} +{"epoch": 61, "train/lr": 4.417254972186445e-05, "train/grad": 0.10727064714257785, "train/loss": 0.8129022182464599, "eval/hcp-train-subset/loss": 0.8275363945191906, "eval/hcp-val/loss": 0.8335432391012868, "eval/nsd-val/loss": 0.8110426654738765} +{"epoch": 62, "train/lr": 4.220705148553925e-05, "train/grad": 0.10887750759136396, "train/loss": 0.8130522941112518, "eval/hcp-train-subset/loss": 0.825796167696676, "eval/hcp-val/loss": 0.8340700441791166, "eval/nsd-val/loss": 0.8112770088257328} +{"epoch": 63, "train/lr": 4.026374329047657e-05, "train/grad": 0.11257912595631142, "train/loss": 0.8107731063079834, "eval/hcp-train-subset/loss": 0.8258214362205998, "eval/hcp-val/loss": 0.8340536125244633, "eval/nsd-val/loss": 0.8117968863056552} +{"epoch": 64, "train/lr": 3.834475011565652e-05, "train/grad": 0.11390504615784416, "train/loss": 0.8116195034980774, "eval/hcp-train-subset/loss": 0.8241305947303772, "eval/hcp-val/loss": 0.8332197396985946, "eval/nsd-val/loss": 0.8105246203560983} +{"epoch": 65, "train/lr": 3.6452170351940815e-05, "train/grad": 0.11489132250287504, "train/loss": 0.8109449425125123, "eval/hcp-train-subset/loss": 0.8248595460768668, "eval/hcp-val/loss": 0.8331170533933947, "eval/nsd-val/loss": 0.8104138768488361} +{"epoch": 66, "train/lr": 3.458807350751516e-05, "train/grad": 0.116826064960981, "train/loss": 0.8099381205272674, "eval/hcp-train-subset/loss": 0.824276537664475, "eval/hcp-val/loss": 0.8332048241169222, "eval/nsd-val/loss": 0.8129540412656723} +{"epoch": 67, "train/lr": 3.2754497944910164e-05, "train/grad": 0.11797457789374985, "train/loss": 0.8107862046051025, "eval/hcp-train-subset/loss": 0.8246248124107238, "eval/hcp-val/loss": 0.8329746905834444, "eval/nsd-val/loss": 0.8136140742609578} +{"epoch": 68, "train/lr": 3.0953448652083367e-05, "train/grad": 0.11960476596276341, "train/loss": 0.8100435165405273, "eval/hcp-train-subset/loss": 0.8224684201901958, "eval/hcp-val/loss": 0.8324193021943492, "eval/nsd-val/loss": 0.8148154327946324} +{"epoch": 69, "train/lr": 2.9186895049993948e-05, "train/grad": 0.12035317278769907, "train/loss": 0.809746097831726, "eval/hcp-train-subset/loss": 0.8214985612900026, "eval/hcp-val/loss": 0.8321794579105992, "eval/nsd-val/loss": 0.8108530717511331} +{"epoch": 70, "train/lr": 2.7456768839068717e-05, "train/grad": 0.12438732930596216, "train/loss": 0.8062628432273865, "eval/hcp-train-subset/loss": 0.8213262875233928, "eval/hcp-val/loss": 0.832730459590112, "eval/nsd-val/loss": 0.8090148568153381} +{"epoch": 71, "train/lr": 2.5764961886919063e-05, "train/grad": 0.12344911771560524, "train/loss": 0.810247432937622, "eval/hcp-train-subset/loss": 0.8219206669638234, "eval/hcp-val/loss": 0.8324820091647487, "eval/nsd-val/loss": 0.8093601099906429} +{"epoch": 72, "train/lr": 2.411332415960724e-05, "train/grad": 0.12790856706562825, "train/loss": 0.8068524253940582, "eval/hcp-train-subset/loss": 0.8207617027144278, "eval/hcp-val/loss": 0.8323346876328991, "eval/nsd-val/loss": 0.8101262286786111} +{"epoch": 73, "train/lr": 2.2503661698739544e-05, "train/grad": 0.12745450467958971, "train/loss": 0.8093125702285766, "eval/hcp-train-subset/loss": 0.819473224301492, "eval/hcp-val/loss": 0.8317146157064745, "eval/nsd-val/loss": 0.8104250219560438} +{"epoch": 74, "train/lr": 2.0937734646583902e-05, "train/grad": 0.12880242877770148, "train/loss": 0.8098874076080322, "eval/hcp-train-subset/loss": 0.8173572046141471, "eval/hcp-val/loss": 0.8317561457234044, "eval/nsd-val/loss": 0.8115882546670975} +{"epoch": 75, "train/lr": 1.9417255321381202e-05, "train/grad": 0.12999701826199986, "train/loss": 0.8093910970211029, "eval/hcp-train-subset/loss": 0.8179018757035655, "eval/hcp-val/loss": 0.8319932068547895, "eval/nsd-val/loss": 0.8133965278825452} +{"epoch": 76, "train/lr": 1.7943886344950134e-05, "train/grad": 0.13267155501409583, "train/loss": 0.8085323287963867, "eval/hcp-train-subset/loss": 0.8169505086637312, "eval/hcp-val/loss": 0.8305025591004279, "eval/nsd-val/loss": 0.8110345207875774} +{"epoch": 77, "train/lr": 1.651923882463461e-05, "train/grad": 0.1348378394757816, "train/loss": 0.8064895512199401, "eval/hcp-train-subset/loss": 0.8171956356494657, "eval/hcp-val/loss": 0.8312889952813426, "eval/nsd-val/loss": 0.8137582675103219} +{"epoch": 78, "train/lr": 1.5144870591581508e-05, "train/grad": 0.13462504441115652, "train/loss": 0.8084500450897216, "eval/hcp-train-subset/loss": 0.8168051531237941, "eval/hcp-val/loss": 0.8306057222427861, "eval/nsd-val/loss": 0.8118332336025853} +{"epoch": 79, "train/lr": 1.3822284497275662e-05, "train/grad": 0.13922055952020773, "train/loss": 0.8064383937072754, "eval/hcp-train-subset/loss": 0.8156357032637442, "eval/hcp-val/loss": 0.830629107452208, "eval/nsd-val/loss": 0.8111505287308847} +{"epoch": 80, "train/lr": 1.2552926770192975e-05, "train/grad": 0.1382033396655367, "train/loss": 0.8089218574333191, "eval/hcp-train-subset/loss": 0.816051923459576, "eval/hcp-val/loss": 0.8305449956847776, "eval/nsd-val/loss": 0.8093745487351571} +{"epoch": 81, "train/lr": 1.1338185434371453e-05, "train/grad": 0.13852376007879208, "train/loss": 0.8083270741653442, "eval/hcp-train-subset/loss": 0.8154205968303065, "eval/hcp-val/loss": 0.830771904799246, "eval/nsd-val/loss": 0.8149221106883018} +{"epoch": 82, "train/lr": 1.0179388791627326e-05, "train/grad": 0.14054327484262605, "train/loss": 0.8088435851860046, "eval/hcp-train-subset/loss": 0.8146794890203783, "eval/hcp-val/loss": 0.8312042759310815, "eval/nsd-val/loss": 0.8182526432698772} +{"epoch": 83, "train/lr": 9.07780396907607e-06, "train/grad": 0.1436469578555464, "train/loss": 0.8060136544418335, "eval/hcp-train-subset/loss": 0.8141555997633165, "eval/hcp-val/loss": 0.8306109914856572, "eval/nsd-val/loss": 0.8158872521692707} +{"epoch": 84, "train/lr": 8.034635533547902e-06, "train/grad": 0.14388307722740915, "train/loss": 0.8087169422531127, "eval/hcp-train-subset/loss": 0.8128796248666702, "eval/hcp-val/loss": 0.8300130780666105, "eval/nsd-val/loss": 0.812794073935478} +{"epoch": 85, "train/lr": 7.051024174411275e-06, "train/grad": 0.14195948125367394, "train/loss": 0.8125374702644348, "eval/hcp-train-subset/loss": 0.8124905574706293, "eval/hcp-val/loss": 0.830326787887081, "eval/nsd-val/loss": 0.8151383669145645} +{"epoch": 86, "train/lr": 6.1280454562463606e-06, "train/grad": 0.143398149538626, "train/loss": 0.8101493040943146, "eval/hcp-train-subset/loss": 0.8116148662182593, "eval/hcp-val/loss": 0.8298188553702447, "eval/nsd-val/loss": 0.813730651332486} +{"epoch": 87, "train/lr": 5.266708642730326e-06, "train/grad": 0.14725383508726286, "train/loss": 0.8094664994812012, "eval/hcp-train-subset/loss": 0.8112014753203238, "eval/hcp-val/loss": 0.8291778429861991, "eval/nsd-val/loss": 0.8130138612562611} +{"epoch": 88, "train/lr": 4.467955593022733e-06, "train/grad": 0.15027544136517368, "train/loss": 0.807799443731308, "eval/hcp-train-subset/loss": 0.8102446871419107, "eval/hcp-val/loss": 0.8294154290230044, "eval/nsd-val/loss": 0.8134184325895002} +{"epoch": 89, "train/lr": 3.732659731856291e-06, "train/grad": 0.1463563738846554, "train/loss": 0.8115738939380646, "eval/hcp-train-subset/loss": 0.8096299286811582, "eval/hcp-val/loss": 0.8293793960925071, "eval/nsd-val/loss": 0.8136986792087555} +{"epoch": 90, "train/lr": 3.0616250944596583e-06, "train/grad": 0.15242992085930213, "train/loss": 0.8059433866119384, "eval/hcp-train-subset/loss": 0.8087305532347772, "eval/hcp-val/loss": 0.8299131528023751, "eval/nsd-val/loss": 0.8127895286006313} +{"epoch": 91, "train/lr": 2.4555854473568305e-06, "train/grad": 0.15139697710022615, "train/loss": 0.8085190845870972, "eval/hcp-train-subset/loss": 0.8080996380698297, "eval/hcp-val/loss": 0.8290733925757869, "eval/nsd-val/loss": 0.8131830999928136} +{"epoch": 92, "train/lr": 1.915203486004091e-06, "train/grad": 0.15370451528626697, "train/loss": 0.8085740648555756, "eval/hcp-train-subset/loss": 0.8082502715049251, "eval/hcp-val/loss": 0.8294073026026448, "eval/nsd-val/loss": 0.8125187023993461} +{"epoch": 93, "train/lr": 1.4410701101423926e-06, "train/grad": 0.1568943358642954, "train/loss": 0.8091572137451172, "eval/hcp-train-subset/loss": 0.8074376160098661, "eval/hcp-val/loss": 0.8291257100720559, "eval/nsd-val/loss": 0.8116801679134369} +{"epoch": 94, "train/lr": 1.0337037776570775e-06, "train/grad": 0.15399877164899234, "train/loss": 0.8103153504371643, "eval/hcp-train-subset/loss": 0.8074586449130889, "eval/hcp-val/loss": 0.8292982684027764, "eval/nsd-val/loss": 0.8118645577661453} +{"epoch": 95, "train/lr": 6.935499376518293e-07, "train/grad": 0.1538534771972453, "train/loss": 0.8127805411911011, "eval/hcp-train-subset/loss": 0.8069069068278035, "eval/hcp-val/loss": 0.8283277086673244, "eval/nsd-val/loss": 0.8130025690601718} +{"epoch": 96, "train/lr": 4.209805433566085e-07, "train/grad": 0.15906587372344375, "train/loss": 0.8096346040821075, "eval/hcp-train-subset/loss": 0.80655221881405, "eval/hcp-val/loss": 0.8285974792895778, "eval/nsd-val/loss": 0.8118682993996528} +{"epoch": 97, "train/lr": 2.1629364540224422e-07, "train/grad": 0.16038455040202593, "train/loss": 0.8102071796417236, "eval/hcp-train-subset/loss": 0.806275604232665, "eval/hcp-val/loss": 0.8284542531736435, "eval/nsd-val/loss": 0.8118691396328711} +{"epoch": 98, "train/lr": 7.971306590647406e-08, "train/grad": 0.16063204632986444, "train/loss": 0.8097754142856598, "eval/hcp-train-subset/loss": 0.8066235463465413, "eval/hcp-val/loss": 0.8284778239265564, "eval/nsd-val/loss": 0.8118052992128557} +{"epoch": 99, "train/lr": 1.1388153727718725e-08, "train/grad": 0.16085737398807698, "train/loss": 0.8121749060249328, "eval/hcp-train-subset/loss": 0.8061634206002758, "eval/hcp-val/loss": 0.828370972025779, "eval/nsd-val/loss": 0.8116522191032287} diff --git a/data_scaling/n1600_1/pretrain/log.txt b/data_scaling/n1600_1/pretrain/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..108ea28b8e0334d40b266328f16c55edf2d692aa --- /dev/null +++ b/data_scaling/n1600_1/pretrain/log.txt @@ -0,0 +1,8272 @@ +pretraining fmri mae +start: 2026-01-17 20:35:33 +cwd: /admin/home/connor/fmri-fm +sha: 4c3ccfb0b63e4f01e9758042b5299530a6d93949, status: has uncommitted changes, branch: dev/clane9 +config: +name: data_scaling/n1600_1/pretrain +notes: data scaling experiment n1600_1 (seed=1644) +output_dir: experiments/data_scaling/output/data_scaling/n1600_1/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..01599}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 1644 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 + +train transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +val transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +mask generator: +TubeMasking( + mask_ratio=0.9 + (patchify): Patchify2D((224, 560), (16, 16), in_chans=1) +) +loading dataset: hcp-train + +type: wds +url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..01599}.tar +clipping: random +clipping_kwargs: + oversample: 4.0 +shuffle: true +buffer_size: 2000 +samples_per_epoch: 200000 + +loading dataset: hcp-train-subset + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [8543, 6917, 6772, 3955, 6165, 1554, 1082, 5811, 6919, 3150] +loading dataset: hcp-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1075, 1189, 738, 1350, 965, 1964, 1367, 1183, 1619, 1407] +loading dataset: nsd-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1493, 4276, 245, 3092, 3905, 1862, 2362, 4411, 1138, 2824] +model: +MaskedAutoencoderViT( + decoding=attn, t_pred_stride=2, pred_edge_pad=0, no_decode_pos=True + (encoder): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) + (pred_patchify): StridedPatchify3D((16, 224, 560), (2, 16, 16), in_chans=1, t_stride=2) + (decoder): MaskedDecoder( + cross_decode=False, class_token=True, no_embed_class=True + (pos_embed): SeparablePosEmbed(512, (4, 14, 35)) + (proj): Linear(in_features=768, out_features=512, bias=True) + (blocks): ModuleList( + (0-3): 4 x Block( + (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=16 + (q): Linear(in_features=512, out_features=512, bias=True) + (k): Linear(in_features=512, out_features=512, bias=True) + (v): Linear(in_features=512, out_features=512, bias=True) + (proj): Linear(in_features=512, out_features=512, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=512, out_features=2048, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=2048, out_features=512, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (head): Linear(in_features=512, out_features=512, bias=True) + ) +) +num params: 99.7M +total batch size: 32 = 32 bs per gpu x 1 accum x 1 gpus +lr: 1.25e-04 = 1.00e-03 x 32 / 256 +full schedule: epochs = 100 (steps = 625000) +warmup: epochs = 5 (steps = 31250) +start training for 100 epochs +Train: [0] [ 0/6250] eta: 11:52:30 lr: 0.000000 grad: 0.0152 (0.0152) loss: 0.9961 (0.9961) time: 6.8401 data: 5.5384 max mem: 8570 +Train: [0] [ 100/6250] eta: 0:18:35 lr: 0.000000 grad: 0.0128 (0.0144) loss: 0.9956 (0.9957) time: 0.1354 data: 0.0619 max mem: 9377 +Train: [0] [ 200/6250] eta: 0:15:53 lr: 0.000001 grad: 0.0138 (0.0140) loss: 0.9956 (0.9958) time: 0.1454 data: 0.0647 max mem: 9377 +Train: [0] [ 300/6250] eta: 0:14:43 lr: 0.000001 grad: 0.0131 (0.0137) loss: 0.9959 (0.9958) time: 0.1125 data: 0.0288 max mem: 9377 +Train: [0] [ 400/6250] eta: 0:14:27 lr: 0.000002 grad: 0.0127 (0.0135) loss: 0.9957 (0.9958) time: 0.1488 data: 0.0553 max mem: 9377 +Train: [0] [ 500/6250] eta: 0:14:09 lr: 0.000002 grad: 0.0126 (0.0134) loss: 0.9955 (0.9958) time: 0.1592 data: 0.0665 max mem: 9377 +Train: [0] [ 600/6250] eta: 0:13:52 lr: 0.000002 grad: 0.0127 (0.0133) loss: 0.9951 (0.9958) time: 0.1617 data: 0.0739 max mem: 9377 +Train: [0] [ 700/6250] eta: 0:13:36 lr: 0.000003 grad: 0.0127 (0.0132) loss: 0.9956 (0.9958) time: 0.1520 data: 0.0646 max mem: 9377 +Train: [0] [ 800/6250] eta: 0:13:19 lr: 0.000003 grad: 0.0129 (0.0132) loss: 0.9954 (0.9958) time: 0.1475 data: 0.0622 max mem: 9377 +Train: [0] [ 900/6250] eta: 0:13:00 lr: 0.000004 grad: 0.0129 (0.0132) loss: 0.9956 (0.9957) time: 0.1490 data: 0.0625 max mem: 9377 +Train: [0] [1000/6250] eta: 0:12:41 lr: 0.000004 grad: 0.0133 (0.0132) loss: 0.9957 (0.9958) time: 0.1336 data: 0.0527 max mem: 9377 +Train: [0] [1100/6250] eta: 0:12:22 lr: 0.000004 grad: 0.0141 (0.0133) loss: 0.9953 (0.9958) time: 0.1357 data: 0.0457 max mem: 9377 +Train: [0] [1200/6250] eta: 0:12:05 lr: 0.000005 grad: 0.0159 (0.0134) loss: 0.9959 (0.9958) time: 0.1415 data: 0.0553 max mem: 9377 +Train: [0] [1300/6250] eta: 0:11:46 lr: 0.000005 grad: 0.0170 (0.0137) loss: 0.9957 (0.9958) time: 0.1547 data: 0.0742 max mem: 9377 +Train: [0] [1400/6250] eta: 0:11:26 lr: 0.000006 grad: 0.0215 (0.0142) loss: 0.9959 (0.9958) time: 0.1358 data: 0.0505 max mem: 9377 +Train: [0] [1500/6250] eta: 0:11:09 lr: 0.000006 grad: 0.0253 (0.0149) loss: 0.9953 (0.9958) time: 0.1267 data: 0.0475 max mem: 9377 +Train: [0] [1600/6250] eta: 0:10:52 lr: 0.000006 grad: 0.0288 (0.0158) loss: 0.9956 (0.9958) time: 0.0955 data: 0.0082 max mem: 9377 +Train: [0] [1700/6250] eta: 0:10:35 lr: 0.000007 grad: 0.0360 (0.0169) loss: 0.9950 (0.9957) time: 0.1101 data: 0.0276 max mem: 9377 +Train: [0] [1800/6250] eta: 0:10:18 lr: 0.000007 grad: 0.0406 (0.0184) loss: 0.9947 (0.9957) time: 0.1168 data: 0.0313 max mem: 9377 +Train: [0] [1900/6250] eta: 0:10:01 lr: 0.000008 grad: 0.0436 (0.0200) loss: 0.9942 (0.9957) time: 0.1326 data: 0.0510 max mem: 9377 +Train: [0] [2000/6250] eta: 0:09:45 lr: 0.000008 grad: 0.0537 (0.0214) loss: 0.9952 (0.9956) time: 0.1208 data: 0.0360 max mem: 9377 +Train: [0] [2100/6250] eta: 0:09:31 lr: 0.000008 grad: 0.0444 (0.0226) loss: 0.9945 (0.9956) time: 0.1462 data: 0.0628 max mem: 9377 +Train: [0] [2200/6250] eta: 0:09:14 lr: 0.000009 grad: 0.0408 (0.0237) loss: 0.9946 (0.9956) time: 0.1144 data: 0.0272 max mem: 9377 +Train: [0] [2300/6250] eta: 0:08:58 lr: 0.000009 grad: 0.0431 (0.0250) loss: 0.9941 (0.9955) time: 0.1146 data: 0.0249 max mem: 9377 +Train: [0] [2400/6250] eta: 0:08:43 lr: 0.000010 grad: 0.0616 (0.0260) loss: 0.9940 (0.9955) time: 0.1359 data: 0.0550 max mem: 9377 +Train: [0] [2500/6250] eta: 0:08:28 lr: 0.000010 grad: 0.0432 (0.0268) loss: 0.9941 (0.9954) time: 0.1130 data: 0.0124 max mem: 9377 +Train: [0] [2600/6250] eta: 0:08:13 lr: 0.000010 grad: 0.0439 (0.0278) loss: 0.9945 (0.9954) time: 0.1167 data: 0.0290 max mem: 9377 +Train: [0] [2700/6250] eta: 0:07:59 lr: 0.000011 grad: 0.0508 (0.0288) loss: 0.9934 (0.9953) time: 0.1190 data: 0.0370 max mem: 9377 +Train: [0] [2800/6250] eta: 0:07:44 lr: 0.000011 grad: 0.0468 (0.0298) loss: 0.9938 (0.9953) time: 0.1295 data: 0.0408 max mem: 9377 +Train: [0] [2900/6250] eta: 0:07:30 lr: 0.000012 grad: 0.0399 (0.0307) loss: 0.9943 (0.9952) time: 0.1265 data: 0.0439 max mem: 9377 +Train: [0] [3000/6250] eta: 0:07:16 lr: 0.000012 grad: 0.0612 (0.0321) loss: 0.9942 (0.9952) time: 0.1282 data: 0.0436 max mem: 9377 +Train: [0] [3100/6250] eta: 0:07:02 lr: 0.000012 grad: 0.0479 (0.0331) loss: 0.9931 (0.9951) time: 0.1140 data: 0.0290 max mem: 9377 +Train: [0] [3200/6250] eta: 0:06:48 lr: 0.000013 grad: 0.0660 (0.0342) loss: 0.9929 (0.9951) time: 0.1336 data: 0.0501 max mem: 9377 +Train: [0] [3300/6250] eta: 0:06:35 lr: 0.000013 grad: 0.0680 (0.0352) loss: 0.9929 (0.9950) time: 0.1375 data: 0.0471 max mem: 9377 +Train: [0] [3400/6250] eta: 0:06:21 lr: 0.000014 grad: 0.0767 (0.0362) loss: 0.9921 (0.9949) time: 0.1370 data: 0.0426 max mem: 9377 +Train: [0] [3500/6250] eta: 0:06:08 lr: 0.000014 grad: 0.0592 (0.0371) loss: 0.9930 (0.9949) time: 0.1308 data: 0.0366 max mem: 9377 +Train: [0] [3600/6250] eta: 0:05:55 lr: 0.000014 grad: 0.0656 (0.0380) loss: 0.9932 (0.9948) time: 0.1426 data: 0.0412 max mem: 9377 +Train: [0] [3700/6250] eta: 0:05:42 lr: 0.000015 grad: 0.0591 (0.0388) loss: 0.9920 (0.9948) time: 0.1291 data: 0.0492 max mem: 9377 +Train: [0] [3800/6250] eta: 0:05:28 lr: 0.000015 grad: 0.0604 (0.0397) loss: 0.9925 (0.9947) time: 0.1381 data: 0.0490 max mem: 9377 +Train: [0] [3900/6250] eta: 0:05:15 lr: 0.000016 grad: 0.0614 (0.0404) loss: 0.9920 (0.9946) time: 0.1467 data: 0.0574 max mem: 9377 +Train: [0] [4000/6250] eta: 0:05:02 lr: 0.000016 grad: 0.0641 (0.0413) loss: 0.9912 (0.9946) time: 0.1613 data: 0.0608 max mem: 9377 +Train: [0] [4100/6250] eta: 0:04:49 lr: 0.000016 grad: 0.0737 (0.0421) loss: 0.9907 (0.9945) time: 0.1461 data: 0.0578 max mem: 9377 +Train: [0] [4200/6250] eta: 0:04:36 lr: 0.000017 grad: 0.0766 (0.0430) loss: 0.9908 (0.9944) time: 0.1255 data: 0.0334 max mem: 9377 +Train: [0] [4300/6250] eta: 0:04:23 lr: 0.000017 grad: 0.0747 (0.0438) loss: 0.9917 (0.9943) time: 0.1491 data: 0.0670 max mem: 9377 +Train: [0] [4400/6250] eta: 0:04:09 lr: 0.000018 grad: 0.0855 (0.0446) loss: 0.9903 (0.9943) time: 0.1343 data: 0.0482 max mem: 9377 +Train: [0] [4500/6250] eta: 0:03:56 lr: 0.000018 grad: 0.0703 (0.0453) loss: 0.9906 (0.9942) time: 0.1197 data: 0.0369 max mem: 9377 +Train: [0] [4600/6250] eta: 0:03:42 lr: 0.000018 grad: 0.0798 (0.0460) loss: 0.9909 (0.9941) time: 0.1181 data: 0.0308 max mem: 9377 +Train: [0] [4700/6250] eta: 0:03:29 lr: 0.000019 grad: 0.0724 (0.0468) loss: 0.9913 (0.9940) time: 0.1366 data: 0.0528 max mem: 9377 +Train: [0] [4800/6250] eta: 0:03:16 lr: 0.000019 grad: 0.0772 (0.0476) loss: 0.9915 (0.9940) time: 0.1449 data: 0.0561 max mem: 9377 +Train: [0] [4900/6250] eta: 0:03:02 lr: 0.000020 grad: 0.0756 (0.0484) loss: 0.9897 (0.9939) time: 0.1452 data: 0.0583 max mem: 9377 +Train: [0] [5000/6250] eta: 0:02:49 lr: 0.000020 grad: 0.0795 (0.0490) loss: 0.9914 (0.9938) time: 0.1333 data: 0.0480 max mem: 9377 +Train: [0] [5100/6250] eta: 0:02:35 lr: 0.000020 grad: 0.0775 (0.0497) loss: 0.9916 (0.9938) time: 0.1633 data: 0.0798 max mem: 9377 +Train: [0] [5200/6250] eta: 0:02:22 lr: 0.000021 grad: 0.0729 (0.0503) loss: 0.9895 (0.9937) time: 0.1399 data: 0.0492 max mem: 9377 +Train: [0] [5300/6250] eta: 0:02:08 lr: 0.000021 grad: 0.0797 (0.0509) loss: 0.9894 (0.9936) time: 0.1223 data: 0.0413 max mem: 9377 +Train: [0] [5400/6250] eta: 0:01:55 lr: 0.000022 grad: 0.0875 (0.0515) loss: 0.9893 (0.9936) time: 0.1534 data: 0.0720 max mem: 9377 +Train: [0] [5500/6250] eta: 0:01:41 lr: 0.000022 grad: 0.0691 (0.0521) loss: 0.9896 (0.9935) time: 0.1479 data: 0.0622 max mem: 9377 +Train: [0] [5600/6250] eta: 0:01:28 lr: 0.000022 grad: 0.0752 (0.0527) loss: 0.9900 (0.9934) time: 0.1430 data: 0.0601 max mem: 9377 +Train: [0] [5700/6250] eta: 0:01:14 lr: 0.000023 grad: 0.0737 (0.0532) loss: 0.9906 (0.9934) time: 0.1324 data: 0.0476 max mem: 9377 +Train: [0] [5800/6250] eta: 0:01:01 lr: 0.000023 grad: 0.0794 (0.0536) loss: 0.9898 (0.9933) time: 0.1320 data: 0.0498 max mem: 9377 +Train: [0] [5900/6250] eta: 0:00:47 lr: 0.000024 grad: 0.0665 (0.0541) loss: 0.9912 (0.9933) time: 0.1139 data: 0.0323 max mem: 9377 +Train: [0] [6000/6250] eta: 0:00:33 lr: 0.000024 grad: 0.0726 (0.0544) loss: 0.9910 (0.9932) time: 0.1418 data: 0.0548 max mem: 9377 +Train: [0] [6100/6250] eta: 0:00:20 lr: 0.000024 grad: 0.0761 (0.0549) loss: 0.9896 (0.9932) time: 0.1325 data: 0.0470 max mem: 9377 +Train: [0] [6200/6250] eta: 0:00:06 lr: 0.000025 grad: 0.0787 (0.0552) loss: 0.9902 (0.9931) time: 0.1422 data: 0.0565 max mem: 9377 +Train: [0] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.0794 (0.0555) loss: 0.9904 (0.9931) time: 0.1528 data: 0.0615 max mem: 9377 +Train: [0] Total time: 0:14:13 (0.1365 s / it) +Averaged stats: lr: 0.000025 grad: 0.0794 (0.0555) loss: 0.9904 (0.9931) +Eval (hcp-train-subset): [0] [ 0/62] eta: 0:03:21 loss: 0.9890 (0.9890) time: 3.2445 data: 3.1203 max mem: 9377 +Eval (hcp-train-subset): [0] [61/62] eta: 0:00:00 loss: 0.9890 (0.9889) time: 0.1252 data: 0.1001 max mem: 9377 +Eval (hcp-train-subset): [0] Total time: 0:00:14 (0.2291 s / it) +Averaged stats (hcp-train-subset): loss: 0.9890 (0.9889) +Eval (hcp-val): [0] [ 0/62] eta: 0:02:49 loss: 0.9843 (0.9843) time: 2.7353 data: 2.6762 max mem: 9377 +Eval (hcp-val): [0] [61/62] eta: 0:00:00 loss: 0.9900 (0.9895) time: 0.1290 data: 0.1025 max mem: 9377 +Eval (hcp-val): [0] Total time: 0:00:12 (0.2078 s / it) +Averaged stats (hcp-val): loss: 0.9900 (0.9895) +Eval (nsd-val): [0] [ 0/62] eta: 0:04:16 loss: 0.9872 (0.9872) time: 4.1394 data: 4.1054 max mem: 9377 +Eval (nsd-val): [0] [61/62] eta: 0:00:00 loss: 0.9893 (0.9889) time: 0.1159 data: 0.0908 max mem: 9377 +Eval (nsd-val): [0] Total time: 0:00:12 (0.2088 s / it) +Averaged stats (nsd-val): loss: 0.9893 (0.9889) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [1] [ 0/6250] eta: 7:30:09 lr: 0.000025 grad: 0.1133 (0.1133) loss: 0.9914 (0.9914) time: 4.3215 data: 4.0851 max mem: 9377 +Train: [1] [ 100/6250] eta: 0:19:07 lr: 0.000025 grad: 0.0605 (0.0789) loss: 0.9905 (0.9898) time: 0.1348 data: 0.0527 max mem: 9377 +Train: [1] [ 200/6250] eta: 0:17:31 lr: 0.000026 grad: 0.0824 (0.0824) loss: 0.9899 (0.9893) time: 0.1453 data: 0.0634 max mem: 9377 +Train: [1] [ 300/6250] eta: 0:16:03 lr: 0.000026 grad: 0.0588 (0.0836) loss: 0.9913 (0.9892) time: 0.1512 data: 0.0649 max mem: 9377 +Train: [1] [ 400/6250] eta: 0:15:28 lr: 0.000027 grad: 0.0692 (0.0836) loss: 0.9899 (0.9890) time: 0.1446 data: 0.0590 max mem: 9377 +Train: [1] [ 500/6250] eta: 0:15:01 lr: 0.000027 grad: 0.0674 (0.0829) loss: 0.9902 (0.9890) time: 0.1506 data: 0.0610 max mem: 9377 +Train: [1] [ 600/6250] eta: 0:14:27 lr: 0.000027 grad: 0.0743 (0.0832) loss: 0.9897 (0.9890) time: 0.1329 data: 0.0358 max mem: 9377 +Train: [1] [ 700/6250] eta: 0:13:54 lr: 0.000028 grad: 0.0783 (0.0830) loss: 0.9900 (0.9889) time: 0.1370 data: 0.0498 max mem: 9377 +Train: [1] [ 800/6250] eta: 0:13:32 lr: 0.000028 grad: 0.0875 (0.0831) loss: 0.9895 (0.9889) time: 0.1576 data: 0.0760 max mem: 9377 +Train: [1] [ 900/6250] eta: 0:13:56 lr: 0.000029 grad: 0.0745 (0.0832) loss: 0.9880 (0.9889) time: 0.3318 data: 0.2530 max mem: 9377 +Train: [1] [1000/6250] eta: 0:13:57 lr: 0.000029 grad: 0.0741 (0.0835) loss: 0.9898 (0.9889) time: 0.2106 data: 0.1326 max mem: 9377 +Train: [1] [1100/6250] eta: 0:14:08 lr: 0.000029 grad: 0.0724 (0.0834) loss: 0.9900 (0.9888) time: 0.2374 data: 0.1450 max mem: 9377 +Train: [1] [1200/6250] eta: 0:14:03 lr: 0.000030 grad: 0.0707 (0.0831) loss: 0.9888 (0.9888) time: 0.1871 data: 0.0875 max mem: 9377 +Train: [1] [1300/6250] eta: 0:13:45 lr: 0.000030 grad: 0.0772 (0.0828) loss: 0.9890 (0.9888) time: 0.1546 data: 0.0540 max mem: 9377 +Train: [1] [1400/6250] eta: 0:13:26 lr: 0.000031 grad: 0.0724 (0.0832) loss: 0.9886 (0.9888) time: 0.1598 data: 0.0728 max mem: 9377 +Train: [1] [1500/6250] eta: 0:13:08 lr: 0.000031 grad: 0.0717 (0.0828) loss: 0.9900 (0.9888) time: 0.1400 data: 0.0546 max mem: 9377 +Train: [1] [1600/6250] eta: 0:12:46 lr: 0.000031 grad: 0.0792 (0.0825) loss: 0.9889 (0.9888) time: 0.1590 data: 0.0761 max mem: 9377 +Train: [1] [1700/6250] eta: 0:12:28 lr: 0.000032 grad: 0.0773 (0.0822) loss: 0.9888 (0.9888) time: 0.1428 data: 0.0640 max mem: 9377 +Train: [1] [1800/6250] eta: 0:12:08 lr: 0.000032 grad: 0.0747 (0.0821) loss: 0.9878 (0.9888) time: 0.1660 data: 0.0874 max mem: 9377 +Train: [1] [1900/6250] eta: 0:11:49 lr: 0.000033 grad: 0.0805 (0.0821) loss: 0.9884 (0.9888) time: 0.1539 data: 0.0708 max mem: 9377 +Train: [1] [2000/6250] eta: 0:11:30 lr: 0.000033 grad: 0.0752 (0.0819) loss: 0.9879 (0.9888) time: 0.1349 data: 0.0480 max mem: 9377 +Train: [1] [2100/6250] eta: 0:11:12 lr: 0.000033 grad: 0.0862 (0.0817) loss: 0.9878 (0.9888) time: 0.1469 data: 0.0628 max mem: 9377 +Train: [1] [2200/6250] eta: 0:10:53 lr: 0.000034 grad: 0.0742 (0.0817) loss: 0.9900 (0.9888) time: 0.1416 data: 0.0584 max mem: 9377 +Train: [1] [2300/6250] eta: 0:10:36 lr: 0.000034 grad: 0.0729 (0.0814) loss: 0.9893 (0.9888) time: 0.1389 data: 0.0507 max mem: 9377 +Train: [1] [2400/6250] eta: 0:10:17 lr: 0.000035 grad: 0.0710 (0.0813) loss: 0.9884 (0.9888) time: 0.1619 data: 0.0764 max mem: 9377 +Train: [1] [2500/6250] eta: 0:09:59 lr: 0.000035 grad: 0.0654 (0.0810) loss: 0.9892 (0.9888) time: 0.1400 data: 0.0446 max mem: 9377 +Train: [1] [2600/6250] eta: 0:09:40 lr: 0.000035 grad: 0.0727 (0.0809) loss: 0.9893 (0.9888) time: 0.1249 data: 0.0421 max mem: 9377 +Train: [1] [2700/6250] eta: 0:09:22 lr: 0.000036 grad: 0.0654 (0.0807) loss: 0.9902 (0.9889) time: 0.1393 data: 0.0535 max mem: 9377 +Train: [1] [2800/6250] eta: 0:09:06 lr: 0.000036 grad: 0.0754 (0.0806) loss: 0.9883 (0.9888) time: 0.1500 data: 0.0716 max mem: 9377 +Train: [1] [2900/6250] eta: 0:08:48 lr: 0.000037 grad: 0.0707 (0.0804) loss: 0.9893 (0.9889) time: 0.1316 data: 0.0503 max mem: 9377 +Train: [1] [3000/6250] eta: 0:08:31 lr: 0.000037 grad: 0.0647 (0.0801) loss: 0.9888 (0.9889) time: 0.1320 data: 0.0278 max mem: 9377 +Train: [1] [3100/6250] eta: 0:08:14 lr: 0.000037 grad: 0.0680 (0.0799) loss: 0.9888 (0.9889) time: 0.1475 data: 0.0621 max mem: 9377 +Train: [1] [3200/6250] eta: 0:07:56 lr: 0.000038 grad: 0.0757 (0.0798) loss: 0.9888 (0.9889) time: 0.1417 data: 0.0602 max mem: 9377 +Train: [1] [3300/6250] eta: 0:07:40 lr: 0.000038 grad: 0.0775 (0.0798) loss: 0.9880 (0.9888) time: 0.1384 data: 0.0610 max mem: 9377 +Train: [1] [3400/6250] eta: 0:07:23 lr: 0.000039 grad: 0.0733 (0.0798) loss: 0.9885 (0.9888) time: 0.1309 data: 0.0528 max mem: 9377 +Train: [1] [3500/6250] eta: 0:07:07 lr: 0.000039 grad: 0.0776 (0.0798) loss: 0.9879 (0.9888) time: 0.1594 data: 0.0751 max mem: 9377 +Train: [1] [3600/6250] eta: 0:06:50 lr: 0.000039 grad: 0.0775 (0.0798) loss: 0.9878 (0.9888) time: 0.1171 data: 0.0336 max mem: 9377 +Train: [1] [3700/6250] eta: 0:06:35 lr: 0.000040 grad: 0.0677 (0.0796) loss: 0.9876 (0.9888) time: 0.1859 data: 0.1036 max mem: 9377 +Train: [1] [3800/6250] eta: 0:06:20 lr: 0.000040 grad: 0.0711 (0.0796) loss: 0.9873 (0.9887) time: 0.1684 data: 0.0832 max mem: 9377 +Train: [1] [3900/6250] eta: 0:06:05 lr: 0.000041 grad: 0.0765 (0.0795) loss: 0.9876 (0.9887) time: 0.2091 data: 0.1271 max mem: 9377 +Train: [1] [4000/6250] eta: 0:05:49 lr: 0.000041 grad: 0.0754 (0.0797) loss: 0.9867 (0.9887) time: 0.1388 data: 0.0531 max mem: 9377 +Train: [1] [4100/6250] eta: 0:05:34 lr: 0.000041 grad: 0.0791 (0.0798) loss: 0.9859 (0.9886) time: 0.1418 data: 0.0571 max mem: 9377 +Train: [1] [4200/6250] eta: 0:05:19 lr: 0.000042 grad: 0.0814 (0.0799) loss: 0.9859 (0.9886) time: 0.2070 data: 0.1170 max mem: 9377 +Train: [1] [4300/6250] eta: 0:05:03 lr: 0.000042 grad: 0.0835 (0.0801) loss: 0.9849 (0.9885) time: 0.1640 data: 0.0730 max mem: 9377 +Train: [1] [4400/6250] eta: 0:04:48 lr: 0.000043 grad: 0.0756 (0.0802) loss: 0.9872 (0.9885) time: 0.1370 data: 0.0532 max mem: 9377 +Train: [1] [4500/6250] eta: 0:04:33 lr: 0.000043 grad: 0.0760 (0.0804) loss: 0.9888 (0.9885) time: 0.1451 data: 0.0536 max mem: 9377 +Train: [1] [4600/6250] eta: 0:04:17 lr: 0.000043 grad: 0.0856 (0.0806) loss: 0.9869 (0.9884) time: 0.1425 data: 0.0527 max mem: 9377 +Train: [1] [4700/6250] eta: 0:04:02 lr: 0.000044 grad: 0.0847 (0.0807) loss: 0.9880 (0.9884) time: 0.1479 data: 0.0650 max mem: 9377 +Train: [1] [4800/6250] eta: 0:03:46 lr: 0.000044 grad: 0.0919 (0.0809) loss: 0.9858 (0.9883) time: 0.1623 data: 0.0792 max mem: 9377 +Train: [1] [4900/6250] eta: 0:03:31 lr: 0.000045 grad: 0.0819 (0.0811) loss: 0.9874 (0.9883) time: 0.1569 data: 0.0739 max mem: 9377 +Train: [1] [5000/6250] eta: 0:03:15 lr: 0.000045 grad: 0.0881 (0.0811) loss: 0.9869 (0.9883) time: 0.1711 data: 0.0941 max mem: 9377 +Train: [1] [5100/6250] eta: 0:02:59 lr: 0.000045 grad: 0.0727 (0.0812) loss: 0.9882 (0.9882) time: 0.1455 data: 0.0614 max mem: 9377 +Train: [1] [5200/6250] eta: 0:02:44 lr: 0.000046 grad: 0.0793 (0.0813) loss: 0.9865 (0.9882) time: 0.1711 data: 0.0875 max mem: 9377 +Train: [1] [5300/6250] eta: 0:02:28 lr: 0.000046 grad: 0.0853 (0.0815) loss: 0.9871 (0.9882) time: 0.1217 data: 0.0338 max mem: 9377 +Train: [1] [5400/6250] eta: 0:02:12 lr: 0.000047 grad: 0.0805 (0.0817) loss: 0.9854 (0.9881) time: 0.1612 data: 0.0776 max mem: 9377 +Train: [1] [5500/6250] eta: 0:01:57 lr: 0.000047 grad: 0.0808 (0.0818) loss: 0.9867 (0.9881) time: 0.1786 data: 0.0846 max mem: 9377 +Train: [1] [5600/6250] eta: 0:01:41 lr: 0.000047 grad: 0.0808 (0.0819) loss: 0.9872 (0.9881) time: 0.1555 data: 0.0712 max mem: 9377 +Train: [1] [5700/6250] eta: 0:01:26 lr: 0.000048 grad: 0.0836 (0.0820) loss: 0.9862 (0.9880) time: 0.1242 data: 0.0286 max mem: 9377 +Train: [1] [5800/6250] eta: 0:01:10 lr: 0.000048 grad: 0.0888 (0.0822) loss: 0.9855 (0.9880) time: 0.1514 data: 0.0688 max mem: 9377 +Train: [1] [5900/6250] eta: 0:00:54 lr: 0.000049 grad: 0.0885 (0.0823) loss: 0.9856 (0.9880) time: 0.1477 data: 0.0588 max mem: 9377 +Train: [1] [6000/6250] eta: 0:00:39 lr: 0.000049 grad: 0.0785 (0.0825) loss: 0.9867 (0.9879) time: 0.1507 data: 0.0711 max mem: 9377 +Train: [1] [6100/6250] eta: 0:00:23 lr: 0.000049 grad: 0.0872 (0.0826) loss: 0.9859 (0.9879) time: 0.1732 data: 0.0737 max mem: 9377 +Train: [1] [6200/6250] eta: 0:00:07 lr: 0.000050 grad: 0.0814 (0.0828) loss: 0.9865 (0.9878) time: 0.1573 data: 0.0731 max mem: 9377 +Train: [1] [6249/6250] eta: 0:00:00 lr: 0.000050 grad: 0.0807 (0.0829) loss: 0.9865 (0.9878) time: 0.1319 data: 0.0557 max mem: 9377 +Train: [1] Total time: 0:16:26 (0.1578 s / it) +Averaged stats: lr: 0.000050 grad: 0.0807 (0.0829) loss: 0.9865 (0.9878) +Eval (hcp-train-subset): [1] [ 0/62] eta: 0:04:34 loss: 0.9845 (0.9845) time: 4.4313 data: 4.3998 max mem: 9377 +Eval (hcp-train-subset): [1] [61/62] eta: 0:00:00 loss: 0.9863 (0.9860) time: 0.1686 data: 0.1415 max mem: 9377 +Eval (hcp-train-subset): [1] Total time: 0:00:15 (0.2558 s / it) +Averaged stats (hcp-train-subset): loss: 0.9863 (0.9860) +Eval (hcp-val): [1] [ 0/62] eta: 0:03:34 loss: 0.9885 (0.9885) time: 3.4552 data: 3.3748 max mem: 9377 +Eval (hcp-val): [1] [61/62] eta: 0:00:00 loss: 0.9860 (0.9859) time: 0.1308 data: 0.1058 max mem: 9377 +Eval (hcp-val): [1] Total time: 0:00:13 (0.2257 s / it) +Averaged stats (hcp-val): loss: 0.9860 (0.9859) +Eval (nsd-val): [1] [ 0/62] eta: 0:05:14 loss: 0.9888 (0.9888) time: 5.0789 data: 5.0498 max mem: 9377 +Eval (nsd-val): [1] [61/62] eta: 0:00:00 loss: 0.9885 (0.9874) time: 0.1500 data: 0.1250 max mem: 9377 +Eval (nsd-val): [1] Total time: 0:00:13 (0.2198 s / it) +Averaged stats (nsd-val): loss: 0.9885 (0.9874) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [2] [ 0/6250] eta: 6:19:56 lr: 0.000050 grad: 0.0719 (0.0719) loss: 0.9927 (0.9927) time: 3.6475 data: 3.4223 max mem: 9377 +Train: [2] [ 100/6250] eta: 0:18:47 lr: 0.000050 grad: 0.0698 (0.0752) loss: 0.9864 (0.9887) time: 0.1400 data: 0.0489 max mem: 9377 +Train: [2] [ 200/6250] eta: 0:17:12 lr: 0.000051 grad: 0.0776 (0.0788) loss: 0.9872 (0.9876) time: 0.1579 data: 0.0752 max mem: 9377 +Train: [2] [ 300/6250] eta: 0:16:37 lr: 0.000051 grad: 0.0657 (0.0779) loss: 0.9868 (0.9874) time: 0.1696 data: 0.0751 max mem: 9377 +Train: [2] [ 400/6250] eta: 0:16:11 lr: 0.000052 grad: 0.0771 (0.0784) loss: 0.9855 (0.9872) time: 0.1521 data: 0.0555 max mem: 9377 +Train: [2] [ 500/6250] eta: 0:15:39 lr: 0.000052 grad: 0.0707 (0.0798) loss: 0.9884 (0.9869) time: 0.1554 data: 0.0690 max mem: 9377 +Train: [2] [ 600/6250] eta: 0:15:16 lr: 0.000052 grad: 0.0736 (0.0797) loss: 0.9872 (0.9868) time: 0.1719 data: 0.0368 max mem: 9377 +Train: [2] [ 700/6250] eta: 0:14:47 lr: 0.000053 grad: 0.0901 (0.0807) loss: 0.9853 (0.9865) time: 0.1417 data: 0.0444 max mem: 9377 +Train: [2] [ 800/6250] eta: 0:14:41 lr: 0.000053 grad: 0.0763 (0.0812) loss: 0.9859 (0.9864) time: 0.1857 data: 0.0989 max mem: 9377 +Train: [2] [ 900/6250] eta: 0:14:56 lr: 0.000054 grad: 0.0767 (0.0818) loss: 0.9867 (0.9863) time: 0.1882 data: 0.1040 max mem: 9377 +Train: [2] [1000/6250] eta: 0:14:53 lr: 0.000054 grad: 0.0908 (0.0828) loss: 0.9869 (0.9863) time: 0.2042 data: 0.1214 max mem: 9377 +Train: [2] [1100/6250] eta: 0:14:54 lr: 0.000054 grad: 0.0786 (0.0836) loss: 0.9856 (0.9861) time: 0.1750 data: 0.0799 max mem: 9377 +Train: [2] [1200/6250] eta: 0:14:33 lr: 0.000055 grad: 0.0824 (0.0842) loss: 0.9833 (0.9860) time: 0.1164 data: 0.0260 max mem: 9377 +Train: [2] [1300/6250] eta: 0:14:12 lr: 0.000055 grad: 0.0729 (0.0849) loss: 0.9866 (0.9859) time: 0.1569 data: 0.0710 max mem: 9377 +Train: [2] [1400/6250] eta: 0:14:00 lr: 0.000056 grad: 0.0823 (0.0853) loss: 0.9867 (0.9858) time: 0.1844 data: 0.0949 max mem: 9377 +Train: [2] [1500/6250] eta: 0:13:44 lr: 0.000056 grad: 0.0925 (0.0858) loss: 0.9840 (0.9857) time: 0.1814 data: 0.0980 max mem: 9377 +Train: [2] [1600/6250] eta: 0:13:31 lr: 0.000056 grad: 0.0817 (0.0862) loss: 0.9852 (0.9857) time: 0.2220 data: 0.1413 max mem: 9377 +Train: [2] [1700/6250] eta: 0:13:14 lr: 0.000057 grad: 0.0796 (0.0864) loss: 0.9849 (0.9856) time: 0.1475 data: 0.0550 max mem: 9377 +Train: [2] [1800/6250] eta: 0:12:56 lr: 0.000057 grad: 0.0861 (0.0864) loss: 0.9845 (0.9856) time: 0.2237 data: 0.1275 max mem: 9377 +Train: [2] [1900/6250] eta: 0:12:37 lr: 0.000058 grad: 0.0786 (0.0867) loss: 0.9842 (0.9855) time: 0.2105 data: 0.1173 max mem: 9377 +Train: [2] [2000/6250] eta: 0:12:19 lr: 0.000058 grad: 0.0836 (0.0870) loss: 0.9841 (0.9854) time: 0.1856 data: 0.0968 max mem: 9377 +Train: [2] [2100/6250] eta: 0:11:58 lr: 0.000058 grad: 0.0792 (0.0872) loss: 0.9848 (0.9854) time: 0.1438 data: 0.0611 max mem: 9377 +Train: [2] [2200/6250] eta: 0:11:38 lr: 0.000059 grad: 0.0762 (0.0874) loss: 0.9855 (0.9853) time: 0.1647 data: 0.0754 max mem: 9377 +Train: [2] [2300/6250] eta: 0:11:19 lr: 0.000059 grad: 0.0846 (0.0874) loss: 0.9856 (0.9853) time: 0.1911 data: 0.1111 max mem: 9377 +Train: [2] [2400/6250] eta: 0:11:01 lr: 0.000060 grad: 0.0716 (0.0874) loss: 0.9870 (0.9853) time: 0.1681 data: 0.0792 max mem: 9377 +Train: [2] [2500/6250] eta: 0:10:41 lr: 0.000060 grad: 0.0764 (0.0875) loss: 0.9857 (0.9852) time: 0.1352 data: 0.0428 max mem: 9377 +Train: [2] [2600/6250] eta: 0:10:23 lr: 0.000060 grad: 0.0798 (0.0875) loss: 0.9876 (0.9852) time: 0.1655 data: 0.0785 max mem: 9377 +Train: [2] [2700/6250] eta: 0:10:04 lr: 0.000061 grad: 0.0837 (0.0877) loss: 0.9836 (0.9851) time: 0.1433 data: 0.0554 max mem: 9377 +Train: [2] [2800/6250] eta: 0:09:46 lr: 0.000061 grad: 0.0873 (0.0879) loss: 0.9843 (0.9851) time: 0.1697 data: 0.0907 max mem: 9377 +Train: [2] [2900/6250] eta: 0:09:27 lr: 0.000062 grad: 0.0784 (0.0880) loss: 0.9828 (0.9850) time: 0.1587 data: 0.0734 max mem: 9377 +Train: [2] [3000/6250] eta: 0:09:09 lr: 0.000062 grad: 0.0771 (0.0882) loss: 0.9872 (0.9850) time: 0.1669 data: 0.0887 max mem: 9377 +Train: [2] [3100/6250] eta: 0:08:51 lr: 0.000062 grad: 0.0833 (0.0882) loss: 0.9845 (0.9850) time: 0.1675 data: 0.0845 max mem: 9377 +Train: [2] [3200/6250] eta: 0:08:33 lr: 0.000063 grad: 0.0810 (0.0884) loss: 0.9836 (0.9850) time: 0.1573 data: 0.0602 max mem: 9377 +Train: [2] [3300/6250] eta: 0:08:16 lr: 0.000063 grad: 0.0817 (0.0884) loss: 0.9854 (0.9850) time: 0.1752 data: 0.0836 max mem: 9377 +Train: [2] [3400/6250] eta: 0:07:58 lr: 0.000064 grad: 0.0739 (0.0884) loss: 0.9858 (0.9849) time: 0.1471 data: 0.0651 max mem: 9377 +Train: [2] [3500/6250] eta: 0:07:39 lr: 0.000064 grad: 0.0792 (0.0884) loss: 0.9861 (0.9849) time: 0.1543 data: 0.0610 max mem: 9377 +Train: [2] [3600/6250] eta: 0:07:22 lr: 0.000064 grad: 0.0839 (0.0884) loss: 0.9848 (0.9849) time: 0.1612 data: 0.0643 max mem: 9377 +Train: [2] [3700/6250] eta: 0:07:04 lr: 0.000065 grad: 0.0803 (0.0883) loss: 0.9850 (0.9849) time: 0.1466 data: 0.0683 max mem: 9377 +Train: [2] [3800/6250] eta: 0:06:47 lr: 0.000065 grad: 0.0828 (0.0884) loss: 0.9856 (0.9849) time: 0.1710 data: 0.0813 max mem: 9377 +Train: [2] [3900/6250] eta: 0:06:30 lr: 0.000066 grad: 0.0803 (0.0883) loss: 0.9837 (0.9849) time: 0.1337 data: 0.0464 max mem: 9377 +Train: [2] [4000/6250] eta: 0:06:13 lr: 0.000066 grad: 0.0877 (0.0884) loss: 0.9836 (0.9849) time: 0.1458 data: 0.0586 max mem: 9377 +Train: [2] [4100/6250] eta: 0:05:56 lr: 0.000066 grad: 0.0790 (0.0885) loss: 0.9851 (0.9849) time: 0.1424 data: 0.0545 max mem: 9377 +Train: [2] [4200/6250] eta: 0:05:39 lr: 0.000067 grad: 0.0857 (0.0887) loss: 0.9825 (0.9849) time: 0.1460 data: 0.0635 max mem: 9377 +Train: [2] [4300/6250] eta: 0:05:22 lr: 0.000067 grad: 0.0901 (0.0889) loss: 0.9849 (0.9849) time: 0.1501 data: 0.0576 max mem: 9377 +Train: [2] [4400/6250] eta: 0:05:05 lr: 0.000068 grad: 0.1057 (0.0892) loss: 0.9830 (0.9848) time: 0.1512 data: 0.0617 max mem: 9377 +Train: [2] [4500/6250] eta: 0:04:48 lr: 0.000068 grad: 0.0838 (0.0894) loss: 0.9838 (0.9848) time: 0.1572 data: 0.0672 max mem: 9377 +Train: [2] [4600/6250] eta: 0:04:31 lr: 0.000068 grad: 0.0877 (0.0897) loss: 0.9865 (0.9848) time: 0.1454 data: 0.0586 max mem: 9377 +Train: [2] [4700/6250] eta: 0:04:14 lr: 0.000069 grad: 0.0893 (0.0899) loss: 0.9851 (0.9848) time: 0.1342 data: 0.0549 max mem: 9377 +Train: [2] [4800/6250] eta: 0:03:57 lr: 0.000069 grad: 0.0935 (0.0901) loss: 0.9838 (0.9847) time: 0.1740 data: 0.0744 max mem: 9377 +Train: [2] [4900/6250] eta: 0:03:41 lr: 0.000070 grad: 0.0983 (0.0904) loss: 0.9841 (0.9847) time: 0.1599 data: 0.0722 max mem: 9377 +Train: [2] [5000/6250] eta: 0:03:24 lr: 0.000070 grad: 0.1063 (0.0907) loss: 0.9849 (0.9847) time: 0.1498 data: 0.0537 max mem: 9377 +Train: [2] [5100/6250] eta: 0:03:08 lr: 0.000070 grad: 0.0813 (0.0909) loss: 0.9848 (0.9847) time: 0.1415 data: 0.0540 max mem: 9377 +Train: [2] [5200/6250] eta: 0:02:51 lr: 0.000071 grad: 0.1062 (0.0913) loss: 0.9841 (0.9847) time: 0.1470 data: 0.0453 max mem: 9377 +Train: [2] [5300/6250] eta: 0:02:34 lr: 0.000071 grad: 0.1166 (0.0917) loss: 0.9799 (0.9846) time: 0.1447 data: 0.0594 max mem: 9377 +Train: [2] [5400/6250] eta: 0:02:18 lr: 0.000072 grad: 0.0994 (0.0922) loss: 0.9850 (0.9846) time: 0.1519 data: 0.0532 max mem: 9377 +Train: [2] [5500/6250] eta: 0:02:02 lr: 0.000072 grad: 0.1312 (0.0928) loss: 0.9845 (0.9846) time: 0.1809 data: 0.0901 max mem: 9377 +Train: [2] [5600/6250] eta: 0:01:45 lr: 0.000072 grad: 0.1053 (0.0934) loss: 0.9840 (0.9845) time: 0.1319 data: 0.0390 max mem: 9377 +Train: [2] [5700/6250] eta: 0:01:29 lr: 0.000073 grad: 0.1249 (0.0945) loss: 0.9851 (0.9845) time: 0.1725 data: 0.0886 max mem: 9377 +Train: [2] [5800/6250] eta: 0:01:13 lr: 0.000073 grad: 0.1131 (0.0955) loss: 0.9827 (0.9845) time: 0.1639 data: 0.0777 max mem: 9377 +Train: [2] [5900/6250] eta: 0:00:56 lr: 0.000074 grad: 0.1041 (0.0966) loss: 0.9810 (0.9845) time: 0.1747 data: 0.0917 max mem: 9377 +Train: [2] [6000/6250] eta: 0:00:40 lr: 0.000074 grad: 0.1403 (0.0981) loss: 0.9814 (0.9844) time: 0.1797 data: 0.0876 max mem: 9377 +Train: [2] [6100/6250] eta: 0:00:24 lr: 0.000074 grad: 0.1407 (0.0997) loss: 0.9828 (0.9844) time: 0.1728 data: 0.0871 max mem: 9377 +Train: [2] [6200/6250] eta: 0:00:08 lr: 0.000075 grad: 0.1218 (0.1012) loss: 0.9811 (0.9843) time: 0.1464 data: 0.0683 max mem: 9377 +Train: [2] [6249/6250] eta: 0:00:00 lr: 0.000075 grad: 0.1428 (0.1022) loss: 0.9836 (0.9843) time: 0.1578 data: 0.0737 max mem: 9377 +Train: [2] Total time: 0:17:05 (0.1641 s / it) +Averaged stats: lr: 0.000075 grad: 0.1428 (0.1022) loss: 0.9836 (0.9843) +Eval (hcp-train-subset): [2] [ 0/62] eta: 0:03:11 loss: 0.9793 (0.9793) time: 3.0938 data: 3.0102 max mem: 9377 +Eval (hcp-train-subset): [2] [61/62] eta: 0:00:00 loss: 0.9812 (0.9821) time: 0.1330 data: 0.1066 max mem: 9377 +Eval (hcp-train-subset): [2] Total time: 0:00:13 (0.2226 s / it) +Averaged stats (hcp-train-subset): loss: 0.9812 (0.9821) +Eval (hcp-val): [2] [ 0/62] eta: 0:03:36 loss: 0.9773 (0.9773) time: 3.4842 data: 3.4180 max mem: 9377 +Eval (hcp-val): [2] [61/62] eta: 0:00:00 loss: 0.9823 (0.9815) time: 0.1503 data: 0.1250 max mem: 9377 +Eval (hcp-val): [2] Total time: 0:00:14 (0.2404 s / it) +Averaged stats (hcp-val): loss: 0.9823 (0.9815) +Eval (nsd-val): [2] [ 0/62] eta: 0:05:51 loss: 0.9789 (0.9789) time: 5.6694 data: 5.6389 max mem: 9377 +Eval (nsd-val): [2] [61/62] eta: 0:00:00 loss: 0.9836 (0.9833) time: 0.1342 data: 0.1082 max mem: 9377 +Eval (nsd-val): [2] Total time: 0:00:14 (0.2364 s / it) +Averaged stats (nsd-val): loss: 0.9836 (0.9833) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [3] [ 0/6250] eta: 10:29:12 lr: 0.000075 grad: 0.2291 (0.2291) loss: 0.9831 (0.9831) time: 6.0405 data: 5.9370 max mem: 9377 +Train: [3] [ 100/6250] eta: 0:22:30 lr: 0.000075 grad: 0.1089 (0.1931) loss: 0.9845 (0.9851) time: 0.1550 data: 0.0582 max mem: 9377 +Train: [3] [ 200/6250] eta: 0:19:03 lr: 0.000076 grad: 0.1026 (0.1808) loss: 0.9846 (0.9841) time: 0.1581 data: 0.0711 max mem: 9377 +Train: [3] [ 300/6250] eta: 0:18:04 lr: 0.000076 grad: 0.1146 (0.1776) loss: 0.9822 (0.9835) time: 0.1309 data: 0.0334 max mem: 9377 +Train: [3] [ 400/6250] eta: 0:17:36 lr: 0.000077 grad: 0.1397 (0.1843) loss: 0.9840 (0.9830) time: 0.2031 data: 0.1023 max mem: 9377 +Train: [3] [ 500/6250] eta: 0:16:53 lr: 0.000077 grad: 0.1335 (0.1885) loss: 0.9828 (0.9828) time: 0.1653 data: 0.0591 max mem: 9377 +Train: [3] [ 600/6250] eta: 0:16:22 lr: 0.000077 grad: 0.1204 (0.1907) loss: 0.9829 (0.9825) time: 0.1617 data: 0.0586 max mem: 9377 +Train: [3] [ 700/6250] eta: 0:15:50 lr: 0.000078 grad: 0.1418 (0.1909) loss: 0.9804 (0.9823) time: 0.1389 data: 0.0511 max mem: 9377 +Train: [3] [ 800/6250] eta: 0:15:19 lr: 0.000078 grad: 0.1560 (0.1922) loss: 0.9831 (0.9821) time: 0.1427 data: 0.0576 max mem: 9377 +Train: [3] [ 900/6250] eta: 0:14:57 lr: 0.000079 grad: 0.1866 (0.1948) loss: 0.9829 (0.9821) time: 0.1396 data: 0.0477 max mem: 9377 +Train: [3] [1000/6250] eta: 0:14:44 lr: 0.000079 grad: 0.1867 (0.1949) loss: 0.9828 (0.9820) time: 0.1703 data: 0.0862 max mem: 9377 +Train: [3] [1100/6250] eta: 0:14:28 lr: 0.000079 grad: 0.1134 (0.1938) loss: 0.9814 (0.9819) time: 0.1567 data: 0.0701 max mem: 9377 +Train: [3] [1200/6250] eta: 0:14:11 lr: 0.000080 grad: 0.1886 (0.1953) loss: 0.9834 (0.9819) time: 0.1500 data: 0.0661 max mem: 9377 +Train: [3] [1300/6250] eta: 0:13:55 lr: 0.000080 grad: 0.1789 (0.1926) loss: 0.9813 (0.9820) time: 0.1756 data: 0.0972 max mem: 9377 +Train: [3] [1400/6250] eta: 0:13:39 lr: 0.000081 grad: 0.1161 (0.1917) loss: 0.9814 (0.9819) time: 0.1571 data: 0.0736 max mem: 9377 +Train: [3] [1500/6250] eta: 0:13:25 lr: 0.000081 grad: 0.1768 (0.1920) loss: 0.9803 (0.9819) time: 0.1854 data: 0.1074 max mem: 9377 +Train: [3] [1600/6250] eta: 0:13:06 lr: 0.000081 grad: 0.1922 (0.1942) loss: 0.9842 (0.9818) time: 0.1758 data: 0.0856 max mem: 9377 +Train: [3] [1700/6250] eta: 0:12:48 lr: 0.000082 grad: 0.1084 (0.1935) loss: 0.9774 (0.9817) time: 0.1608 data: 0.0787 max mem: 9377 +Train: [3] [1800/6250] eta: 0:12:26 lr: 0.000082 grad: 0.1449 (0.1937) loss: 0.9814 (0.9816) time: 0.1541 data: 0.0669 max mem: 9377 +Train: [3] [1900/6250] eta: 0:12:07 lr: 0.000083 grad: 0.2193 (0.1949) loss: 0.9814 (0.9815) time: 0.1884 data: 0.1019 max mem: 9377 +Train: [3] [2000/6250] eta: 0:11:48 lr: 0.000083 grad: 0.1487 (0.1937) loss: 0.9817 (0.9814) time: 0.1759 data: 0.0914 max mem: 9377 +Train: [3] [2100/6250] eta: 0:11:28 lr: 0.000083 grad: 0.2904 (0.1956) loss: 0.9800 (0.9813) time: 0.1521 data: 0.0544 max mem: 9377 +Train: [3] [2200/6250] eta: 0:11:09 lr: 0.000084 grad: 0.1693 (0.1964) loss: 0.9813 (0.9812) time: 0.1503 data: 0.0675 max mem: 9377 +Train: [3] [2300/6250] eta: 0:10:51 lr: 0.000084 grad: 0.1393 (0.1965) loss: 0.9794 (0.9811) time: 0.1721 data: 0.0875 max mem: 9377 +Train: [3] [2400/6250] eta: 0:10:35 lr: 0.000085 grad: 0.1894 (0.1970) loss: 0.9780 (0.9810) time: 0.1695 data: 0.0849 max mem: 9377 +Train: [3] [2500/6250] eta: 0:10:19 lr: 0.000085 grad: 0.2031 (0.1971) loss: 0.9788 (0.9808) time: 0.1639 data: 0.0786 max mem: 9377 +Train: [3] [2600/6250] eta: 0:10:03 lr: 0.000085 grad: 0.1751 (0.1971) loss: 0.9771 (0.9806) time: 0.1919 data: 0.0957 max mem: 9377 +Train: [3] [2700/6250] eta: 0:09:47 lr: 0.000086 grad: 0.2166 (0.1978) loss: 0.9787 (0.9805) time: 0.1527 data: 0.0636 max mem: 9377 +Train: [3] [2800/6250] eta: 0:09:32 lr: 0.000086 grad: 0.1424 (0.1972) loss: 0.9797 (0.9804) time: 0.2053 data: 0.1116 max mem: 9377 +Train: [3] [2900/6250] eta: 0:09:18 lr: 0.000087 grad: 0.1493 (0.1969) loss: 0.9755 (0.9803) time: 0.1914 data: 0.1009 max mem: 9377 +Train: [3] [3000/6250] eta: 0:09:01 lr: 0.000087 grad: 0.1928 (0.1979) loss: 0.9766 (0.9802) time: 0.1481 data: 0.0589 max mem: 9377 +Train: [3] [3100/6250] eta: 0:08:44 lr: 0.000087 grad: 0.1385 (0.1970) loss: 0.9760 (0.9801) time: 0.1610 data: 0.0735 max mem: 9377 +Train: [3] [3200/6250] eta: 0:08:26 lr: 0.000088 grad: 0.1575 (0.1974) loss: 0.9778 (0.9800) time: 0.1578 data: 0.0702 max mem: 9377 +Train: [3] [3300/6250] eta: 0:08:09 lr: 0.000088 grad: 0.2073 (0.1978) loss: 0.9766 (0.9798) time: 0.1869 data: 0.0982 max mem: 9377 +Train: [3] [3400/6250] eta: 0:07:52 lr: 0.000089 grad: 0.1944 (0.1978) loss: 0.9757 (0.9797) time: 0.1472 data: 0.0558 max mem: 9377 +Train: [3] [3500/6250] eta: 0:07:36 lr: 0.000089 grad: 0.1715 (0.1980) loss: 0.9752 (0.9796) time: 0.1950 data: 0.1079 max mem: 9377 +Train: [3] [3600/6250] eta: 0:07:18 lr: 0.000089 grad: 0.1825 (0.1977) loss: 0.9766 (0.9795) time: 0.1607 data: 0.0718 max mem: 9377 +Train: [3] [3700/6250] eta: 0:07:01 lr: 0.000090 grad: 0.1900 (0.1973) loss: 0.9750 (0.9794) time: 0.1456 data: 0.0531 max mem: 9377 +Train: [3] [3800/6250] eta: 0:06:43 lr: 0.000090 grad: 0.2001 (0.1974) loss: 0.9761 (0.9793) time: 0.1429 data: 0.0509 max mem: 9377 +Train: [3] [3900/6250] eta: 0:06:27 lr: 0.000091 grad: 0.1547 (0.1974) loss: 0.9761 (0.9792) time: 0.1721 data: 0.0871 max mem: 9377 +Train: [3] [4000/6250] eta: 0:06:10 lr: 0.000091 grad: 0.1543 (0.1969) loss: 0.9757 (0.9791) time: 0.1589 data: 0.0716 max mem: 9377 +Train: [3] [4100/6250] eta: 0:05:53 lr: 0.000091 grad: 0.1763 (0.1974) loss: 0.9747 (0.9790) time: 0.1529 data: 0.0675 max mem: 9377 +Train: [3] [4200/6250] eta: 0:05:36 lr: 0.000092 grad: 0.1633 (0.1974) loss: 0.9758 (0.9789) time: 0.1418 data: 0.0569 max mem: 9377 +Train: [3] [4300/6250] eta: 0:05:20 lr: 0.000092 grad: 0.2003 (0.1972) loss: 0.9754 (0.9788) time: 0.1694 data: 0.0840 max mem: 9377 +Train: [3] [4400/6250] eta: 0:05:03 lr: 0.000093 grad: 0.1477 (0.1968) loss: 0.9750 (0.9787) time: 0.1446 data: 0.0615 max mem: 9377 +Train: [3] [4500/6250] eta: 0:04:46 lr: 0.000093 grad: 0.1448 (0.1966) loss: 0.9760 (0.9787) time: 0.1504 data: 0.0679 max mem: 9377 +Train: [3] [4600/6250] eta: 0:04:29 lr: 0.000093 grad: 0.1760 (0.1967) loss: 0.9751 (0.9786) time: 0.1787 data: 0.0960 max mem: 9377 +Train: [3] [4700/6250] eta: 0:04:12 lr: 0.000094 grad: 0.1822 (0.1968) loss: 0.9736 (0.9785) time: 0.1475 data: 0.0627 max mem: 9377 +Train: [3] [4800/6250] eta: 0:03:56 lr: 0.000094 grad: 0.1469 (0.1964) loss: 0.9738 (0.9784) time: 0.1677 data: 0.0862 max mem: 9377 +Train: [3] [4900/6250] eta: 0:03:39 lr: 0.000095 grad: 0.2037 (0.1964) loss: 0.9771 (0.9783) time: 0.1447 data: 0.0609 max mem: 9377 +Train: [3] [5000/6250] eta: 0:03:23 lr: 0.000095 grad: 0.1445 (0.1963) loss: 0.9728 (0.9782) time: 0.1570 data: 0.0666 max mem: 9377 +Train: [3] [5100/6250] eta: 0:03:06 lr: 0.000095 grad: 0.1853 (0.1967) loss: 0.9763 (0.9782) time: 0.1671 data: 0.0804 max mem: 9377 +Train: [3] [5200/6250] eta: 0:02:50 lr: 0.000096 grad: 0.1298 (0.1963) loss: 0.9717 (0.9781) time: 0.1517 data: 0.0606 max mem: 9377 +Train: [3] [5300/6250] eta: 0:02:34 lr: 0.000096 grad: 0.1362 (0.1958) loss: 0.9728 (0.9780) time: 0.1515 data: 0.0709 max mem: 9377 +Train: [3] [5400/6250] eta: 0:02:17 lr: 0.000097 grad: 0.1613 (0.1955) loss: 0.9747 (0.9779) time: 0.1782 data: 0.0780 max mem: 9377 +Train: [3] [5500/6250] eta: 0:02:01 lr: 0.000097 grad: 0.1447 (0.1951) loss: 0.9699 (0.9778) time: 0.1819 data: 0.1007 max mem: 9377 +Train: [3] [5600/6250] eta: 0:01:45 lr: 0.000097 grad: 0.1481 (0.1949) loss: 0.9713 (0.9777) time: 0.1364 data: 0.0487 max mem: 9377 +Train: [3] [5700/6250] eta: 0:01:29 lr: 0.000098 grad: 0.1794 (0.1945) loss: 0.9674 (0.9775) time: 0.1698 data: 0.0898 max mem: 9377 +Train: [3] [5800/6250] eta: 0:01:13 lr: 0.000098 grad: 0.1885 (0.1945) loss: 0.9665 (0.9773) time: 0.1520 data: 0.0736 max mem: 9377 +Train: [3] [5900/6250] eta: 0:00:56 lr: 0.000099 grad: 0.1313 (0.1949) loss: 0.9695 (0.9772) time: 0.1460 data: 0.0684 max mem: 9377 +Train: [3] [6000/6250] eta: 0:00:40 lr: 0.000099 grad: 0.1501 (0.1948) loss: 0.9680 (0.9770) time: 0.1671 data: 0.0845 max mem: 9377 +Train: [3] [6100/6250] eta: 0:00:24 lr: 0.000099 grad: 0.1494 (0.1945) loss: 0.9703 (0.9769) time: 0.1562 data: 0.0710 max mem: 9377 +Train: [3] [6200/6250] eta: 0:00:08 lr: 0.000100 grad: 0.1858 (0.1942) loss: 0.9642 (0.9767) time: 0.1652 data: 0.0751 max mem: 9377 +Train: [3] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1461 (0.1941) loss: 0.9660 (0.9766) time: 0.1859 data: 0.1015 max mem: 9377 +Train: [3] Total time: 0:17:05 (0.1641 s / it) +Averaged stats: lr: 0.000100 grad: 0.1461 (0.1941) loss: 0.9660 (0.9766) +Eval (hcp-train-subset): [3] [ 0/62] eta: 0:04:57 loss: 0.9653 (0.9653) time: 4.7952 data: 4.7616 max mem: 9377 +Eval (hcp-train-subset): [3] [61/62] eta: 0:00:00 loss: 0.9665 (0.9649) time: 0.1238 data: 0.0974 max mem: 9377 +Eval (hcp-train-subset): [3] Total time: 0:00:15 (0.2501 s / it) +Averaged stats (hcp-train-subset): loss: 0.9665 (0.9649) +Eval (hcp-val): [3] [ 0/62] eta: 0:05:02 loss: 0.9611 (0.9611) time: 4.8765 data: 4.8475 max mem: 9377 +Eval (hcp-val): [3] [61/62] eta: 0:00:00 loss: 0.9648 (0.9656) time: 0.1326 data: 0.1073 max mem: 9377 +Eval (hcp-val): [3] Total time: 0:00:13 (0.2199 s / it) +Averaged stats (hcp-val): loss: 0.9648 (0.9656) +Eval (nsd-val): [3] [ 0/62] eta: 0:03:38 loss: 0.9457 (0.9457) time: 3.5243 data: 3.4679 max mem: 9377 +Eval (nsd-val): [3] [61/62] eta: 0:00:00 loss: 0.9561 (0.9546) time: 0.1497 data: 0.1238 max mem: 9377 +Eval (nsd-val): [3] Total time: 0:00:13 (0.2228 s / it) +Averaged stats (nsd-val): loss: 0.9561 (0.9546) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [4] [ 0/6250] eta: 8:04:09 lr: 0.000100 grad: 0.3391 (0.3391) loss: 0.9720 (0.9720) time: 4.6480 data: 4.3297 max mem: 9377 +Train: [4] [ 100/6250] eta: 0:23:37 lr: 0.000100 grad: 0.1738 (0.1852) loss: 0.9683 (0.9705) time: 0.1740 data: 0.0736 max mem: 9377 +Train: [4] [ 200/6250] eta: 0:20:20 lr: 0.000101 grad: 0.1700 (0.2008) loss: 0.9639 (0.9673) time: 0.1554 data: 0.0512 max mem: 9377 +Train: [4] [ 300/6250] eta: 0:18:52 lr: 0.000101 grad: 0.1620 (0.2024) loss: 0.9652 (0.9664) time: 0.1582 data: 0.0705 max mem: 9377 +Train: [4] [ 400/6250] eta: 0:17:43 lr: 0.000102 grad: 0.2017 (0.2081) loss: 0.9617 (0.9651) time: 0.1383 data: 0.0448 max mem: 9377 +Train: [4] [ 500/6250] eta: 0:16:57 lr: 0.000102 grad: 0.1540 (0.2041) loss: 0.9633 (0.9642) time: 0.1512 data: 0.0552 max mem: 9377 +Train: [4] [ 600/6250] eta: 0:16:29 lr: 0.000102 grad: 0.1731 (0.2013) loss: 0.9634 (0.9635) time: 0.1667 data: 0.0826 max mem: 9377 +Train: [4] [ 700/6250] eta: 0:16:01 lr: 0.000103 grad: 0.1872 (0.2067) loss: 0.9648 (0.9632) time: 0.1560 data: 0.0647 max mem: 9377 +Train: [4] [ 800/6250] eta: 0:15:31 lr: 0.000103 grad: 0.1619 (0.2108) loss: 0.9646 (0.9629) time: 0.1560 data: 0.0663 max mem: 9377 +Train: [4] [ 900/6250] eta: 0:15:10 lr: 0.000104 grad: 0.1612 (0.2124) loss: 0.9605 (0.9628) time: 0.1414 data: 0.0513 max mem: 9377 +Train: [4] [1000/6250] eta: 0:14:50 lr: 0.000104 grad: 0.1621 (0.2141) loss: 0.9586 (0.9626) time: 0.1606 data: 0.0762 max mem: 9377 +Train: [4] [1100/6250] eta: 0:14:37 lr: 0.000104 grad: 0.2233 (0.2151) loss: 0.9611 (0.9624) time: 0.1547 data: 0.0762 max mem: 9377 +Train: [4] [1200/6250] eta: 0:14:15 lr: 0.000105 grad: 0.1583 (0.2144) loss: 0.9591 (0.9622) time: 0.1507 data: 0.0672 max mem: 9377 +Train: [4] [1300/6250] eta: 0:13:50 lr: 0.000105 grad: 0.1418 (0.2137) loss: 0.9572 (0.9620) time: 0.1308 data: 0.0478 max mem: 9377 +Train: [4] [1400/6250] eta: 0:13:27 lr: 0.000106 grad: 0.1610 (0.2131) loss: 0.9571 (0.9618) time: 0.1524 data: 0.0700 max mem: 9377 +Train: [4] [1500/6250] eta: 0:13:08 lr: 0.000106 grad: 0.1723 (0.2140) loss: 0.9590 (0.9616) time: 0.1578 data: 0.0742 max mem: 9377 +Train: [4] [1600/6250] eta: 0:12:56 lr: 0.000106 grad: 0.1927 (0.2136) loss: 0.9577 (0.9614) time: 0.1449 data: 0.0619 max mem: 9377 +Train: [4] [1700/6250] eta: 0:12:42 lr: 0.000107 grad: 0.2802 (0.2163) loss: 0.9610 (0.9613) time: 0.1668 data: 0.0824 max mem: 9377 +Train: [4] [1800/6250] eta: 0:12:24 lr: 0.000107 grad: 0.1754 (0.2160) loss: 0.9567 (0.9611) time: 0.1494 data: 0.0729 max mem: 9377 +Train: [4] [1900/6250] eta: 0:12:06 lr: 0.000108 grad: 0.2110 (0.2170) loss: 0.9580 (0.9609) time: 0.1606 data: 0.0809 max mem: 9377 +Train: [4] [2000/6250] eta: 0:11:47 lr: 0.000108 grad: 0.1569 (0.2169) loss: 0.9565 (0.9607) time: 0.1860 data: 0.1037 max mem: 9377 +Train: [4] [2100/6250] eta: 0:11:30 lr: 0.000108 grad: 0.1556 (0.2179) loss: 0.9535 (0.9605) time: 0.1618 data: 0.0821 max mem: 9377 +Train: [4] [2200/6250] eta: 0:11:13 lr: 0.000109 grad: 0.1837 (0.2178) loss: 0.9536 (0.9602) time: 0.1486 data: 0.0652 max mem: 9377 +Train: [4] [2300/6250] eta: 0:10:57 lr: 0.000109 grad: 0.2074 (0.2195) loss: 0.9537 (0.9599) time: 0.1572 data: 0.0750 max mem: 9377 +Train: [4] [2400/6250] eta: 0:10:39 lr: 0.000110 grad: 0.2822 (0.2206) loss: 0.9553 (0.9596) time: 0.1635 data: 0.0783 max mem: 9377 +Train: [4] [2500/6250] eta: 0:10:19 lr: 0.000110 grad: 0.2557 (0.2216) loss: 0.9546 (0.9593) time: 0.1373 data: 0.0550 max mem: 9377 +Train: [4] [2600/6250] eta: 0:10:02 lr: 0.000110 grad: 0.2324 (0.2233) loss: 0.9436 (0.9589) time: 0.1542 data: 0.0640 max mem: 9377 +Train: [4] [2700/6250] eta: 0:09:44 lr: 0.000111 grad: 0.2443 (0.2249) loss: 0.9506 (0.9585) time: 0.1484 data: 0.0638 max mem: 9377 +Train: [4] [2800/6250] eta: 0:09:25 lr: 0.000111 grad: 0.2121 (0.2272) loss: 0.9457 (0.9581) time: 0.1520 data: 0.0595 max mem: 9377 +Train: [4] [2900/6250] eta: 0:09:07 lr: 0.000112 grad: 0.2719 (0.2294) loss: 0.9448 (0.9578) time: 0.1473 data: 0.0588 max mem: 9377 +Train: [4] [3000/6250] eta: 0:08:50 lr: 0.000112 grad: 0.2613 (0.2326) loss: 0.9455 (0.9574) time: 0.1373 data: 0.0473 max mem: 9377 +Train: [4] [3100/6250] eta: 0:08:32 lr: 0.000112 grad: 0.2893 (0.2342) loss: 0.9448 (0.9570) time: 0.1415 data: 0.0504 max mem: 9377 +Train: [4] [3200/6250] eta: 0:08:16 lr: 0.000113 grad: 0.2787 (0.2358) loss: 0.9416 (0.9566) time: 0.1782 data: 0.0875 max mem: 9377 +Train: [4] [3300/6250] eta: 0:08:00 lr: 0.000113 grad: 0.2572 (0.2364) loss: 0.9452 (0.9561) time: 0.1616 data: 0.0631 max mem: 9377 +Train: [4] [3400/6250] eta: 0:07:43 lr: 0.000114 grad: 0.2098 (0.2375) loss: 0.9417 (0.9557) time: 0.1458 data: 0.0523 max mem: 9377 +Train: [4] [3500/6250] eta: 0:07:26 lr: 0.000114 grad: 0.2107 (0.2386) loss: 0.9417 (0.9553) time: 0.1264 data: 0.0423 max mem: 9377 +Train: [4] [3600/6250] eta: 0:07:09 lr: 0.000114 grad: 0.2591 (0.2397) loss: 0.9408 (0.9549) time: 0.1659 data: 0.0847 max mem: 9377 +Train: [4] [3700/6250] eta: 0:06:53 lr: 0.000115 grad: 0.2065 (0.2395) loss: 0.9398 (0.9545) time: 0.1792 data: 0.0971 max mem: 9377 +Train: [4] [3800/6250] eta: 0:06:37 lr: 0.000115 grad: 0.2354 (0.2399) loss: 0.9376 (0.9540) time: 0.1299 data: 0.0266 max mem: 9377 +Train: [4] [3900/6250] eta: 0:06:20 lr: 0.000116 grad: 0.2285 (0.2397) loss: 0.9363 (0.9536) time: 0.1383 data: 0.0526 max mem: 9377 +Train: [4] [4000/6250] eta: 0:06:03 lr: 0.000116 grad: 0.2261 (0.2399) loss: 0.9360 (0.9532) time: 0.1547 data: 0.0704 max mem: 9377 +Train: [4] [4100/6250] eta: 0:05:46 lr: 0.000116 grad: 0.2073 (0.2409) loss: 0.9352 (0.9528) time: 0.1626 data: 0.0836 max mem: 9377 +Train: [4] [4200/6250] eta: 0:05:30 lr: 0.000117 grad: 0.2404 (0.2412) loss: 0.9331 (0.9524) time: 0.1592 data: 0.0697 max mem: 9377 +Train: [4] [4300/6250] eta: 0:05:14 lr: 0.000117 grad: 0.1767 (0.2408) loss: 0.9317 (0.9519) time: 0.1555 data: 0.0727 max mem: 9377 +Train: [4] [4400/6250] eta: 0:04:57 lr: 0.000118 grad: 0.2096 (0.2407) loss: 0.9339 (0.9515) time: 0.1418 data: 0.0671 max mem: 9377 +Train: [4] [4500/6250] eta: 0:04:41 lr: 0.000118 grad: 0.2506 (0.2407) loss: 0.9325 (0.9511) time: 0.1472 data: 0.0653 max mem: 9377 +Train: [4] [4600/6250] eta: 0:04:24 lr: 0.000118 grad: 0.2262 (0.2407) loss: 0.9322 (0.9507) time: 0.1281 data: 0.0328 max mem: 9377 +Train: [4] [4700/6250] eta: 0:04:08 lr: 0.000119 grad: 0.2215 (0.2403) loss: 0.9305 (0.9503) time: 0.1742 data: 0.0904 max mem: 9377 +Train: [4] [4800/6250] eta: 0:03:52 lr: 0.000119 grad: 0.1886 (0.2402) loss: 0.9345 (0.9499) time: 0.1476 data: 0.0619 max mem: 9377 +Train: [4] [4900/6250] eta: 0:03:35 lr: 0.000120 grad: 0.2202 (0.2395) loss: 0.9294 (0.9495) time: 0.1404 data: 0.0570 max mem: 9377 +Train: [4] [5000/6250] eta: 0:03:19 lr: 0.000120 grad: 0.2131 (0.2392) loss: 0.9292 (0.9492) time: 0.1485 data: 0.0603 max mem: 9377 +Train: [4] [5100/6250] eta: 0:03:03 lr: 0.000120 grad: 0.1758 (0.2388) loss: 0.9323 (0.9488) time: 0.1332 data: 0.0479 max mem: 9377 +Train: [4] [5200/6250] eta: 0:02:47 lr: 0.000121 grad: 0.2020 (0.2388) loss: 0.9264 (0.9484) time: 0.1447 data: 0.0611 max mem: 9377 +Train: [4] [5300/6250] eta: 0:02:31 lr: 0.000121 grad: 0.1910 (0.2389) loss: 0.9263 (0.9480) time: 0.1642 data: 0.0804 max mem: 9377 +Train: [4] [5400/6250] eta: 0:02:15 lr: 0.000122 grad: 0.3031 (0.2392) loss: 0.9266 (0.9476) time: 0.2095 data: 0.1208 max mem: 9377 +Train: [4] [5500/6250] eta: 0:01:59 lr: 0.000122 grad: 0.1628 (0.2386) loss: 0.9267 (0.9473) time: 0.1753 data: 0.0884 max mem: 9377 +Train: [4] [5600/6250] eta: 0:01:44 lr: 0.000122 grad: 0.2832 (0.2386) loss: 0.9250 (0.9469) time: 0.1644 data: 0.0857 max mem: 9377 +Train: [4] [5700/6250] eta: 0:01:28 lr: 0.000123 grad: 0.2095 (0.2385) loss: 0.9242 (0.9465) time: 0.1313 data: 0.0549 max mem: 9377 +Train: [4] [5800/6250] eta: 0:01:12 lr: 0.000123 grad: 0.2027 (0.2382) loss: 0.9223 (0.9461) time: 0.1665 data: 0.0874 max mem: 9377 +Train: [4] [5900/6250] eta: 0:00:56 lr: 0.000124 grad: 0.2007 (0.2377) loss: 0.9230 (0.9457) time: 0.1525 data: 0.0746 max mem: 9377 +Train: [4] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.2058 (0.2374) loss: 0.9278 (0.9454) time: 0.1579 data: 0.0820 max mem: 9377 +Train: [4] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.1752 (0.2368) loss: 0.9204 (0.9450) time: 0.1648 data: 0.0781 max mem: 9377 +Train: [4] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1972 (0.2365) loss: 0.9214 (0.9446) time: 0.1506 data: 0.0543 max mem: 9377 +Train: [4] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.2145 (0.2364) loss: 0.9189 (0.9444) time: 0.1701 data: 0.0789 max mem: 9377 +Train: [4] Total time: 0:16:50 (0.1617 s / it) +Averaged stats: lr: 0.000125 grad: 0.2145 (0.2364) loss: 0.9189 (0.9444) +Eval (hcp-train-subset): [4] [ 0/62] eta: 0:05:39 loss: 0.9168 (0.9168) time: 5.4764 data: 5.4444 max mem: 9377 +Eval (hcp-train-subset): [4] [61/62] eta: 0:00:00 loss: 0.9199 (0.9198) time: 0.1498 data: 0.1242 max mem: 9377 +Eval (hcp-train-subset): [4] Total time: 0:00:15 (0.2508 s / it) +Averaged stats (hcp-train-subset): loss: 0.9199 (0.9198) +Making plots (hcp-train-subset): example=11 +Eval (hcp-val): [4] [ 0/62] eta: 0:05:53 loss: 0.9179 (0.9179) time: 5.6970 data: 5.6649 max mem: 9377 +Eval (hcp-val): [4] [61/62] eta: 0:00:00 loss: 0.9176 (0.9184) time: 0.1457 data: 0.1199 max mem: 9377 +Eval (hcp-val): [4] Total time: 0:00:14 (0.2413 s / it) +Averaged stats (hcp-val): loss: 0.9176 (0.9184) +Making plots (hcp-val): example=36 +Eval (nsd-val): [4] [ 0/62] eta: 0:06:07 loss: 0.8800 (0.8800) time: 5.9274 data: 5.8921 max mem: 9377 +Eval (nsd-val): [4] [61/62] eta: 0:00:00 loss: 0.8874 (0.8879) time: 0.1637 data: 0.1381 max mem: 9377 +Eval (nsd-val): [4] Total time: 0:00:15 (0.2442 s / it) +Averaged stats (nsd-val): loss: 0.8874 (0.8879) +Making plots (nsd-val): example=5 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00004.pth +Train: [5] [ 0/6250] eta: 12:07:40 lr: 0.000125 grad: 0.1908 (0.1908) loss: 0.9206 (0.9206) time: 6.9857 data: 6.8499 max mem: 9377 +Train: [5] [ 100/6250] eta: 0:23:54 lr: 0.000125 grad: 0.2087 (0.2490) loss: 0.9288 (0.9298) time: 0.1984 data: 0.0984 max mem: 9377 +Train: [5] [ 200/6250] eta: 0:19:47 lr: 0.000125 grad: 0.1524 (0.2264) loss: 0.9290 (0.9292) time: 0.1549 data: 0.0724 max mem: 9377 +Train: [5] [ 300/6250] eta: 0:18:06 lr: 0.000125 grad: 0.1662 (0.2210) loss: 0.9219 (0.9271) time: 0.1518 data: 0.0622 max mem: 9377 +Train: [5] [ 400/6250] eta: 0:16:58 lr: 0.000125 grad: 0.1954 (0.2168) loss: 0.9253 (0.9260) time: 0.1485 data: 0.0629 max mem: 9377 +Train: [5] [ 500/6250] eta: 0:16:02 lr: 0.000125 grad: 0.1537 (0.2140) loss: 0.9223 (0.9254) time: 0.1452 data: 0.0579 max mem: 9377 +Train: [5] [ 600/6250] eta: 0:15:18 lr: 0.000125 grad: 0.1371 (0.2123) loss: 0.9229 (0.9251) time: 0.1402 data: 0.0553 max mem: 9377 +Train: [5] [ 700/6250] eta: 0:15:01 lr: 0.000125 grad: 0.1958 (0.2086) loss: 0.9229 (0.9247) time: 0.1711 data: 0.0764 max mem: 9377 +Train: [5] [ 800/6250] eta: 0:14:49 lr: 0.000125 grad: 0.2111 (0.2079) loss: 0.9202 (0.9242) time: 0.1504 data: 0.0641 max mem: 9377 +Train: [5] [ 900/6250] eta: 0:14:37 lr: 0.000125 grad: 0.1704 (0.2065) loss: 0.9196 (0.9237) time: 0.1990 data: 0.1071 max mem: 9377 +Train: [5] [1000/6250] eta: 0:14:13 lr: 0.000125 grad: 0.1660 (0.2054) loss: 0.9207 (0.9233) time: 0.1541 data: 0.0739 max mem: 9377 +Train: [5] [1100/6250] eta: 0:13:53 lr: 0.000125 grad: 0.1908 (0.2039) loss: 0.9190 (0.9229) time: 0.1435 data: 0.0528 max mem: 9377 +Train: [5] [1200/6250] eta: 0:13:38 lr: 0.000125 grad: 0.1857 (0.2017) loss: 0.9164 (0.9226) time: 0.1685 data: 0.0879 max mem: 9377 +Train: [5] [1300/6250] eta: 0:13:22 lr: 0.000125 grad: 0.1865 (0.2001) loss: 0.9166 (0.9222) time: 0.1540 data: 0.0720 max mem: 9377 +Train: [5] [1400/6250] eta: 0:13:02 lr: 0.000125 grad: 0.2194 (0.2009) loss: 0.9136 (0.9219) time: 0.1387 data: 0.0528 max mem: 9377 +Train: [5] [1500/6250] eta: 0:12:44 lr: 0.000125 grad: 0.1756 (0.1998) loss: 0.9149 (0.9214) time: 0.1282 data: 0.0443 max mem: 9377 +Train: [5] [1600/6250] eta: 0:12:22 lr: 0.000125 grad: 0.2085 (0.1984) loss: 0.9184 (0.9211) time: 0.1330 data: 0.0439 max mem: 9377 +Train: [5] [1700/6250] eta: 0:12:06 lr: 0.000125 grad: 0.1334 (0.1980) loss: 0.9146 (0.9209) time: 0.1610 data: 0.0680 max mem: 9377 +Train: [5] [1800/6250] eta: 0:11:50 lr: 0.000125 grad: 0.1602 (0.1972) loss: 0.9183 (0.9206) time: 0.1632 data: 0.0778 max mem: 9377 +Train: [5] [1900/6250] eta: 0:11:38 lr: 0.000125 grad: 0.1641 (0.1957) loss: 0.9139 (0.9203) time: 0.1910 data: 0.1128 max mem: 9377 +Train: [5] [2000/6250] eta: 0:11:22 lr: 0.000125 grad: 0.1378 (0.1951) loss: 0.9157 (0.9200) time: 0.1534 data: 0.0686 max mem: 9377 +Train: [5] [2100/6250] eta: 0:11:07 lr: 0.000125 grad: 0.1503 (0.1938) loss: 0.9147 (0.9197) time: 0.1701 data: 0.0893 max mem: 9377 +Train: [5] [2200/6250] eta: 0:10:52 lr: 0.000125 grad: 0.1475 (0.1925) loss: 0.9123 (0.9195) time: 0.1689 data: 0.0927 max mem: 9377 +Train: [5] [2300/6250] eta: 0:10:36 lr: 0.000125 grad: 0.1732 (0.1916) loss: 0.9148 (0.9192) time: 0.1859 data: 0.1122 max mem: 9377 +Train: [5] [2400/6250] eta: 0:10:21 lr: 0.000125 grad: 0.1847 (0.1915) loss: 0.9141 (0.9190) time: 0.1728 data: 0.0955 max mem: 9377 +Train: [5] [2500/6250] eta: 0:10:04 lr: 0.000125 grad: 0.1697 (0.1910) loss: 0.9100 (0.9187) time: 0.2075 data: 0.0682 max mem: 9377 +Train: [5] [2600/6250] eta: 0:09:45 lr: 0.000125 grad: 0.1485 (0.1900) loss: 0.9088 (0.9184) time: 0.1453 data: 0.0679 max mem: 9377 +Train: [5] [2700/6250] eta: 0:09:28 lr: 0.000125 grad: 0.1701 (0.1891) loss: 0.9128 (0.9182) time: 0.1596 data: 0.0659 max mem: 9377 +Train: [5] [2800/6250] eta: 0:09:11 lr: 0.000125 grad: 0.1789 (0.1883) loss: 0.9096 (0.9179) time: 0.1479 data: 0.0634 max mem: 9377 +Train: [5] [2900/6250] eta: 0:08:55 lr: 0.000125 grad: 0.2060 (0.1876) loss: 0.9114 (0.9176) time: 0.1645 data: 0.0781 max mem: 9377 +Train: [5] [3000/6250] eta: 0:08:38 lr: 0.000125 grad: 0.1634 (0.1870) loss: 0.9081 (0.9173) time: 0.1457 data: 0.0592 max mem: 9377 +Train: [5] [3100/6250] eta: 0:08:21 lr: 0.000125 grad: 0.1568 (0.1864) loss: 0.9068 (0.9169) time: 0.1582 data: 0.0698 max mem: 9377 +Train: [5] [3200/6250] eta: 0:08:05 lr: 0.000125 grad: 0.1426 (0.1858) loss: 0.9038 (0.9166) time: 0.1373 data: 0.0555 max mem: 9377 +Train: [5] [3300/6250] eta: 0:07:48 lr: 0.000125 grad: 0.1837 (0.1852) loss: 0.9024 (0.9162) time: 0.1514 data: 0.0600 max mem: 9377 +Train: [5] [3400/6250] eta: 0:07:31 lr: 0.000125 grad: 0.1460 (0.1845) loss: 0.9091 (0.9159) time: 0.1667 data: 0.0837 max mem: 9377 +Train: [5] [3500/6250] eta: 0:07:15 lr: 0.000125 grad: 0.1586 (0.1840) loss: 0.9004 (0.9155) time: 0.1322 data: 0.0459 max mem: 9377 +Train: [5] [3600/6250] eta: 0:06:58 lr: 0.000125 grad: 0.1413 (0.1833) loss: 0.9037 (0.9151) time: 0.1705 data: 0.0786 max mem: 9377 +Train: [5] [3700/6250] eta: 0:06:43 lr: 0.000125 grad: 0.1510 (0.1829) loss: 0.9031 (0.9148) time: 0.1597 data: 0.0682 max mem: 9377 +Train: [5] [3800/6250] eta: 0:06:26 lr: 0.000125 grad: 0.1393 (0.1822) loss: 0.9030 (0.9144) time: 0.1698 data: 0.0765 max mem: 9377 +Train: [5] [3900/6250] eta: 0:06:11 lr: 0.000125 grad: 0.1504 (0.1818) loss: 0.8961 (0.9141) time: 0.1731 data: 0.0947 max mem: 9377 +Train: [5] [4000/6250] eta: 0:05:54 lr: 0.000125 grad: 0.1500 (0.1814) loss: 0.8972 (0.9136) time: 0.1405 data: 0.0540 max mem: 9377 +Train: [5] [4100/6250] eta: 0:05:39 lr: 0.000125 grad: 0.1786 (0.1816) loss: 0.8958 (0.9132) time: 0.1701 data: 0.0832 max mem: 9377 +Train: [5] [4200/6250] eta: 0:05:23 lr: 0.000125 grad: 0.1880 (0.1813) loss: 0.8965 (0.9128) time: 0.1540 data: 0.0706 max mem: 9377 +Train: [5] [4300/6250] eta: 0:05:07 lr: 0.000125 grad: 0.1370 (0.1810) loss: 0.8949 (0.9124) time: 0.1591 data: 0.0698 max mem: 9377 +Train: [5] [4400/6250] eta: 0:04:52 lr: 0.000125 grad: 0.1915 (0.1812) loss: 0.8909 (0.9120) time: 0.1813 data: 0.0914 max mem: 9377 +Train: [5] [4500/6250] eta: 0:04:36 lr: 0.000125 grad: 0.1507 (0.1809) loss: 0.8874 (0.9115) time: 0.1581 data: 0.0708 max mem: 9377 +Train: [5] [4600/6250] eta: 0:04:20 lr: 0.000125 grad: 0.1869 (0.1812) loss: 0.8915 (0.9111) time: 0.1412 data: 0.0496 max mem: 9377 +Train: [5] [4700/6250] eta: 0:04:04 lr: 0.000125 grad: 0.2005 (0.1813) loss: 0.8912 (0.9106) time: 0.1510 data: 0.0635 max mem: 9377 +Train: [5] [4800/6250] eta: 0:03:48 lr: 0.000125 grad: 0.1773 (0.1812) loss: 0.8890 (0.9102) time: 0.1524 data: 0.0640 max mem: 9377 +Train: [5] [4900/6250] eta: 0:03:32 lr: 0.000125 grad: 0.1697 (0.1815) loss: 0.8858 (0.9097) time: 0.1450 data: 0.0609 max mem: 9377 +Train: [5] [5000/6250] eta: 0:03:17 lr: 0.000125 grad: 0.1450 (0.1810) loss: 0.8879 (0.9093) time: 0.1532 data: 0.0715 max mem: 9377 +Train: [5] [5100/6250] eta: 0:03:01 lr: 0.000125 grad: 0.1613 (0.1810) loss: 0.8884 (0.9089) time: 0.1408 data: 0.0535 max mem: 9377 +Train: [5] [5200/6250] eta: 0:02:45 lr: 0.000125 grad: 0.1597 (0.1807) loss: 0.8901 (0.9085) time: 0.1788 data: 0.0851 max mem: 9377 +Train: [5] [5300/6250] eta: 0:02:30 lr: 0.000125 grad: 0.1322 (0.1806) loss: 0.8889 (0.9082) time: 0.2133 data: 0.1355 max mem: 9377 +Train: [5] [5400/6250] eta: 0:02:15 lr: 0.000125 grad: 0.1523 (0.1801) loss: 0.8864 (0.9078) time: 0.1745 data: 0.0809 max mem: 9377 +Train: [5] [5500/6250] eta: 0:01:59 lr: 0.000125 grad: 0.1476 (0.1799) loss: 0.8855 (0.9074) time: 0.1769 data: 0.0942 max mem: 9377 +Train: [5] [5600/6250] eta: 0:01:43 lr: 0.000125 grad: 0.1733 (0.1795) loss: 0.8894 (0.9071) time: 0.1676 data: 0.0882 max mem: 9377 +Train: [5] [5700/6250] eta: 0:01:27 lr: 0.000125 grad: 0.1541 (0.1790) loss: 0.8875 (0.9067) time: 0.1983 data: 0.1157 max mem: 9377 +Train: [5] [5800/6250] eta: 0:01:11 lr: 0.000125 grad: 0.1409 (0.1785) loss: 0.8889 (0.9064) time: 0.1555 data: 0.0609 max mem: 9377 +Train: [5] [5900/6250] eta: 0:00:55 lr: 0.000125 grad: 0.1324 (0.1780) loss: 0.8899 (0.9061) time: 0.1688 data: 0.0768 max mem: 9377 +Train: [5] [6000/6250] eta: 0:00:39 lr: 0.000125 grad: 0.1408 (0.1775) loss: 0.8857 (0.9057) time: 0.1737 data: 0.0801 max mem: 9377 +Train: [5] [6100/6250] eta: 0:00:23 lr: 0.000125 grad: 0.1235 (0.1771) loss: 0.8888 (0.9055) time: 0.1676 data: 0.0741 max mem: 9377 +Train: [5] [6200/6250] eta: 0:00:07 lr: 0.000125 grad: 0.1175 (0.1766) loss: 0.8888 (0.9052) time: 0.1602 data: 0.0760 max mem: 9377 +Train: [5] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1218 (0.1763) loss: 0.8868 (0.9050) time: 0.1745 data: 0.0907 max mem: 9377 +Train: [5] Total time: 0:16:42 (0.1604 s / it) +Averaged stats: lr: 0.000125 grad: 0.1218 (0.1763) loss: 0.8868 (0.9050) +Eval (hcp-train-subset): [5] [ 0/62] eta: 0:05:36 loss: 0.8859 (0.8859) time: 5.4324 data: 5.3937 max mem: 9377 +Eval (hcp-train-subset): [5] [61/62] eta: 0:00:00 loss: 0.8821 (0.8822) time: 0.1246 data: 0.0994 max mem: 9377 +Eval (hcp-train-subset): [5] Total time: 0:00:14 (0.2305 s / it) +Averaged stats (hcp-train-subset): loss: 0.8821 (0.8822) +Eval (hcp-val): [5] [ 0/62] eta: 0:05:16 loss: 0.8756 (0.8756) time: 5.0988 data: 5.0683 max mem: 9377 +Eval (hcp-val): [5] [61/62] eta: 0:00:00 loss: 0.8796 (0.8804) time: 0.1169 data: 0.0913 max mem: 9377 +Eval (hcp-val): [5] Total time: 0:00:13 (0.2176 s / it) +Averaged stats (hcp-val): loss: 0.8796 (0.8804) +Eval (nsd-val): [5] [ 0/62] eta: 0:05:11 loss: 0.8357 (0.8357) time: 5.0224 data: 4.9916 max mem: 9377 +Eval (nsd-val): [5] [61/62] eta: 0:00:00 loss: 0.8463 (0.8466) time: 0.1149 data: 0.0880 max mem: 9377 +Eval (nsd-val): [5] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (nsd-val): loss: 0.8463 (0.8466) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [6] [ 0/6250] eta: 9:56:43 lr: 0.000125 grad: 0.1037 (0.1037) loss: 0.8999 (0.8999) time: 5.7286 data: 5.5878 max mem: 9377 +Train: [6] [ 100/6250] eta: 0:20:17 lr: 0.000125 grad: 0.1380 (0.2060) loss: 0.8731 (0.8805) time: 0.1602 data: 0.0701 max mem: 9377 +Train: [6] [ 200/6250] eta: 0:17:52 lr: 0.000125 grad: 0.1141 (0.1739) loss: 0.8757 (0.8795) time: 0.1582 data: 0.0776 max mem: 9377 +Train: [6] [ 300/6250] eta: 0:17:06 lr: 0.000125 grad: 0.1308 (0.1648) loss: 0.8849 (0.8806) time: 0.1705 data: 0.0854 max mem: 9377 +Train: [6] [ 400/6250] eta: 0:16:09 lr: 0.000125 grad: 0.1326 (0.1573) loss: 0.8835 (0.8818) time: 0.1481 data: 0.0646 max mem: 9377 +Train: [6] [ 500/6250] eta: 0:15:26 lr: 0.000125 grad: 0.1201 (0.1510) loss: 0.8817 (0.8822) time: 0.1445 data: 0.0629 max mem: 9377 +Train: [6] [ 600/6250] eta: 0:15:10 lr: 0.000125 grad: 0.1250 (0.1485) loss: 0.8878 (0.8826) time: 0.1772 data: 0.0947 max mem: 9377 +Train: [6] [ 700/6250] eta: 0:14:56 lr: 0.000125 grad: 0.1070 (0.1456) loss: 0.8853 (0.8829) time: 0.1663 data: 0.0744 max mem: 9377 +Train: [6] [ 800/6250] eta: 0:14:42 lr: 0.000125 grad: 0.1301 (0.1438) loss: 0.8844 (0.8830) time: 0.1614 data: 0.0654 max mem: 9377 +Train: [6] [ 900/6250] eta: 0:14:25 lr: 0.000125 grad: 0.1257 (0.1414) loss: 0.8858 (0.8830) time: 0.1802 data: 0.0952 max mem: 9377 +Train: [6] [1000/6250] eta: 0:14:11 lr: 0.000125 grad: 0.1177 (0.1393) loss: 0.8803 (0.8829) time: 0.1432 data: 0.0493 max mem: 9377 +Train: [6] [1100/6250] eta: 0:13:48 lr: 0.000125 grad: 0.1090 (0.1375) loss: 0.8825 (0.8828) time: 0.1211 data: 0.0376 max mem: 9377 +Train: [6] [1200/6250] eta: 0:13:26 lr: 0.000125 grad: 0.1107 (0.1369) loss: 0.8794 (0.8827) time: 0.1543 data: 0.0686 max mem: 9377 +Train: [6] [1300/6250] eta: 0:13:13 lr: 0.000125 grad: 0.1177 (0.1360) loss: 0.8804 (0.8825) time: 0.1955 data: 0.1153 max mem: 9377 +Train: [6] [1400/6250] eta: 0:12:59 lr: 0.000125 grad: 0.1023 (0.1350) loss: 0.8831 (0.8823) time: 0.2034 data: 0.1304 max mem: 9377 +Train: [6] [1500/6250] eta: 0:12:41 lr: 0.000125 grad: 0.0984 (0.1340) loss: 0.8767 (0.8821) time: 0.1545 data: 0.0704 max mem: 9377 +Train: [6] [1600/6250] eta: 0:12:25 lr: 0.000125 grad: 0.1128 (0.1330) loss: 0.8803 (0.8820) time: 0.1556 data: 0.0724 max mem: 9377 +Train: [6] [1700/6250] eta: 0:12:09 lr: 0.000125 grad: 0.1457 (0.1328) loss: 0.8806 (0.8819) time: 0.1565 data: 0.0734 max mem: 9377 +Train: [6] [1800/6250] eta: 0:11:54 lr: 0.000125 grad: 0.1111 (0.1320) loss: 0.8771 (0.8817) time: 0.1528 data: 0.0676 max mem: 9377 +Train: [6] [1900/6250] eta: 0:11:39 lr: 0.000125 grad: 0.1023 (0.1309) loss: 0.8751 (0.8816) time: 0.1489 data: 0.0651 max mem: 9377 +Train: [6] [2000/6250] eta: 0:11:21 lr: 0.000125 grad: 0.1216 (0.1303) loss: 0.8754 (0.8814) time: 0.1392 data: 0.0537 max mem: 9377 +Train: [6] [2100/6250] eta: 0:11:03 lr: 0.000125 grad: 0.1140 (0.1292) loss: 0.8768 (0.8813) time: 0.1369 data: 0.0565 max mem: 9377 +Train: [6] [2200/6250] eta: 0:10:45 lr: 0.000125 grad: 0.1060 (0.1284) loss: 0.8791 (0.8812) time: 0.1586 data: 0.0679 max mem: 9377 +Train: [6] [2300/6250] eta: 0:10:29 lr: 0.000125 grad: 0.1063 (0.1279) loss: 0.8753 (0.8811) time: 0.1418 data: 0.0535 max mem: 9377 +Train: [6] [2400/6250] eta: 0:10:14 lr: 0.000125 grad: 0.1048 (0.1271) loss: 0.8775 (0.8810) time: 0.1546 data: 0.0720 max mem: 9377 +Train: [6] [2500/6250] eta: 0:09:58 lr: 0.000125 grad: 0.1076 (0.1264) loss: 0.8783 (0.8808) time: 0.1726 data: 0.0902 max mem: 9377 +Train: [6] [2600/6250] eta: 0:09:42 lr: 0.000125 grad: 0.1112 (0.1260) loss: 0.8777 (0.8807) time: 0.1593 data: 0.0768 max mem: 9377 +Train: [6] [2700/6250] eta: 0:09:26 lr: 0.000125 grad: 0.1177 (0.1253) loss: 0.8787 (0.8807) time: 0.1413 data: 0.0596 max mem: 9377 +Train: [6] [2800/6250] eta: 0:09:10 lr: 0.000125 grad: 0.1008 (0.1247) loss: 0.8751 (0.8805) time: 0.1596 data: 0.0782 max mem: 9377 +Train: [6] [2900/6250] eta: 0:08:53 lr: 0.000125 grad: 0.1071 (0.1241) loss: 0.8765 (0.8804) time: 0.1418 data: 0.0499 max mem: 9377 +Train: [6] [3000/6250] eta: 0:08:36 lr: 0.000125 grad: 0.1006 (0.1237) loss: 0.8760 (0.8804) time: 0.1746 data: 0.0931 max mem: 9377 +Train: [6] [3100/6250] eta: 0:08:20 lr: 0.000125 grad: 0.1057 (0.1232) loss: 0.8759 (0.8803) time: 0.1538 data: 0.0702 max mem: 9377 +Train: [6] [3200/6250] eta: 0:08:05 lr: 0.000125 grad: 0.1022 (0.1226) loss: 0.8787 (0.8802) time: 0.1573 data: 0.0649 max mem: 9377 +Train: [6] [3300/6250] eta: 0:07:50 lr: 0.000125 grad: 0.0971 (0.1220) loss: 0.8736 (0.8801) time: 0.1700 data: 0.0795 max mem: 9377 +Train: [6] [3400/6250] eta: 0:07:34 lr: 0.000125 grad: 0.0986 (0.1216) loss: 0.8732 (0.8799) time: 0.1378 data: 0.0484 max mem: 9377 +Train: [6] [3500/6250] eta: 0:07:18 lr: 0.000125 grad: 0.0949 (0.1211) loss: 0.8774 (0.8798) time: 0.1867 data: 0.1036 max mem: 9377 +Train: [6] [3600/6250] eta: 0:07:01 lr: 0.000125 grad: 0.0992 (0.1208) loss: 0.8766 (0.8797) time: 0.1459 data: 0.0621 max mem: 9377 +Train: [6] [3700/6250] eta: 0:06:44 lr: 0.000125 grad: 0.0993 (0.1204) loss: 0.8765 (0.8796) time: 0.1543 data: 0.0663 max mem: 9377 +Train: [6] [3800/6250] eta: 0:06:28 lr: 0.000125 grad: 0.1072 (0.1201) loss: 0.8727 (0.8794) time: 0.1443 data: 0.0527 max mem: 9377 +Train: [6] [3900/6250] eta: 0:06:11 lr: 0.000125 grad: 0.0982 (0.1196) loss: 0.8764 (0.8793) time: 0.1564 data: 0.0765 max mem: 9377 +Train: [6] [4000/6250] eta: 0:05:55 lr: 0.000125 grad: 0.0995 (0.1192) loss: 0.8720 (0.8791) time: 0.1448 data: 0.0651 max mem: 9377 +Train: [6] [4100/6250] eta: 0:05:39 lr: 0.000125 grad: 0.0929 (0.1188) loss: 0.8741 (0.8790) time: 0.1479 data: 0.0675 max mem: 9377 +Train: [6] [4200/6250] eta: 0:05:23 lr: 0.000125 grad: 0.0918 (0.1187) loss: 0.8718 (0.8788) time: 0.1597 data: 0.0745 max mem: 9377 +Train: [6] [4300/6250] eta: 0:05:08 lr: 0.000125 grad: 0.0936 (0.1183) loss: 0.8729 (0.8786) time: 0.1562 data: 0.0706 max mem: 9377 +Train: [6] [4400/6250] eta: 0:04:52 lr: 0.000125 grad: 0.1042 (0.1179) loss: 0.8737 (0.8785) time: 0.1491 data: 0.0732 max mem: 9377 +Train: [6] [4500/6250] eta: 0:04:35 lr: 0.000125 grad: 0.1026 (0.1176) loss: 0.8763 (0.8784) time: 0.1399 data: 0.0623 max mem: 9377 +Train: [6] [4600/6250] eta: 0:04:20 lr: 0.000125 grad: 0.0840 (0.1172) loss: 0.8733 (0.8783) time: 0.1801 data: 0.0928 max mem: 9377 +Train: [6] [4700/6250] eta: 0:04:04 lr: 0.000125 grad: 0.0952 (0.1169) loss: 0.8708 (0.8781) time: 0.1536 data: 0.0704 max mem: 9377 +Train: [6] [4800/6250] eta: 0:03:48 lr: 0.000125 grad: 0.0860 (0.1165) loss: 0.8763 (0.8780) time: 0.1517 data: 0.0637 max mem: 9377 +Train: [6] [4900/6250] eta: 0:03:33 lr: 0.000125 grad: 0.1052 (0.1163) loss: 0.8722 (0.8779) time: 0.1835 data: 0.1009 max mem: 9377 +Train: [6] [5000/6250] eta: 0:03:18 lr: 0.000125 grad: 0.0954 (0.1159) loss: 0.8738 (0.8778) time: 0.1833 data: 0.0944 max mem: 9377 +Train: [6] [5100/6250] eta: 0:03:03 lr: 0.000125 grad: 0.0907 (0.1155) loss: 0.8718 (0.8777) time: 0.2015 data: 0.1207 max mem: 9377 +Train: [6] [5200/6250] eta: 0:02:47 lr: 0.000125 grad: 0.1057 (0.1151) loss: 0.8726 (0.8776) time: 0.1522 data: 0.0706 max mem: 9377 +Train: [6] [5300/6250] eta: 0:02:31 lr: 0.000125 grad: 0.0895 (0.1147) loss: 0.8714 (0.8774) time: 0.2001 data: 0.1180 max mem: 9377 +Train: [6] [5400/6250] eta: 0:02:15 lr: 0.000125 grad: 0.0828 (0.1143) loss: 0.8669 (0.8773) time: 0.1648 data: 0.0690 max mem: 9377 +Train: [6] [5500/6250] eta: 0:01:59 lr: 0.000125 grad: 0.1052 (0.1141) loss: 0.8726 (0.8772) time: 0.1713 data: 0.0771 max mem: 9377 +Train: [6] [5600/6250] eta: 0:01:43 lr: 0.000125 grad: 0.0949 (0.1138) loss: 0.8667 (0.8771) time: 0.1436 data: 0.0502 max mem: 9377 +Train: [6] [5700/6250] eta: 0:01:28 lr: 0.000125 grad: 0.0914 (0.1135) loss: 0.8643 (0.8769) time: 0.1754 data: 0.0829 max mem: 9377 +Train: [6] [5800/6250] eta: 0:01:12 lr: 0.000125 grad: 0.1026 (0.1132) loss: 0.8661 (0.8768) time: 0.1543 data: 0.0483 max mem: 9377 +Train: [6] [5900/6250] eta: 0:00:56 lr: 0.000125 grad: 0.0822 (0.1129) loss: 0.8732 (0.8767) time: 0.1820 data: 0.0914 max mem: 9377 +Train: [6] [6000/6250] eta: 0:00:40 lr: 0.000125 grad: 0.0863 (0.1127) loss: 0.8621 (0.8765) time: 0.1506 data: 0.0654 max mem: 9377 +Train: [6] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.0944 (0.1124) loss: 0.8673 (0.8764) time: 0.1468 data: 0.0554 max mem: 9377 +Train: [6] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.0850 (0.1122) loss: 0.8724 (0.8763) time: 0.1622 data: 0.0736 max mem: 9377 +Train: [6] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0981 (0.1121) loss: 0.8691 (0.8762) time: 0.1430 data: 0.0507 max mem: 9377 +Train: [6] Total time: 0:16:45 (0.1608 s / it) +Averaged stats: lr: 0.000125 grad: 0.0981 (0.1121) loss: 0.8691 (0.8762) +Eval (hcp-train-subset): [6] [ 0/62] eta: 0:06:04 loss: 0.8648 (0.8648) time: 5.8783 data: 5.8443 max mem: 9377 +Eval (hcp-train-subset): [6] [61/62] eta: 0:00:00 loss: 0.8678 (0.8676) time: 0.1266 data: 0.0998 max mem: 9377 +Eval (hcp-train-subset): [6] Total time: 0:00:14 (0.2416 s / it) +Averaged stats (hcp-train-subset): loss: 0.8678 (0.8676) +Eval (hcp-val): [6] [ 0/62] eta: 0:04:02 loss: 0.8634 (0.8634) time: 3.9126 data: 3.8366 max mem: 9377 +Eval (hcp-val): [6] [61/62] eta: 0:00:00 loss: 0.8642 (0.8657) time: 0.1329 data: 0.1073 max mem: 9377 +Eval (hcp-val): [6] Total time: 0:00:13 (0.2139 s / it) +Averaged stats (hcp-val): loss: 0.8642 (0.8657) +Eval (nsd-val): [6] [ 0/62] eta: 0:05:29 loss: 0.8250 (0.8250) time: 5.3211 data: 5.2917 max mem: 9377 +Eval (nsd-val): [6] [61/62] eta: 0:00:00 loss: 0.8314 (0.8323) time: 0.1414 data: 0.1138 max mem: 9377 +Eval (nsd-val): [6] Total time: 0:00:12 (0.2086 s / it) +Averaged stats (nsd-val): loss: 0.8314 (0.8323) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [7] [ 0/6250] eta: 7:12:23 lr: 0.000125 grad: 0.0945 (0.0945) loss: 0.8898 (0.8898) time: 4.1509 data: 3.9686 max mem: 9377 +Train: [7] [ 100/6250] eta: 0:22:14 lr: 0.000125 grad: 0.0920 (0.1034) loss: 0.8676 (0.8680) time: 0.1731 data: 0.0779 max mem: 9377 +Train: [7] [ 200/6250] eta: 0:19:39 lr: 0.000125 grad: 0.0969 (0.1125) loss: 0.8668 (0.8674) time: 0.1613 data: 0.0685 max mem: 9377 +Train: [7] [ 300/6250] eta: 0:17:56 lr: 0.000125 grad: 0.1143 (0.1095) loss: 0.8662 (0.8665) time: 0.1434 data: 0.0499 max mem: 9377 +Train: [7] [ 400/6250] eta: 0:17:08 lr: 0.000125 grad: 0.0980 (0.1068) loss: 0.8723 (0.8672) time: 0.1557 data: 0.0572 max mem: 9377 +Train: [7] [ 500/6250] eta: 0:16:25 lr: 0.000125 grad: 0.0886 (0.1040) loss: 0.8642 (0.8676) time: 0.1562 data: 0.0561 max mem: 9377 +Train: [7] [ 600/6250] eta: 0:15:57 lr: 0.000125 grad: 0.0851 (0.1020) loss: 0.8720 (0.8684) time: 0.1875 data: 0.1092 max mem: 9377 +Train: [7] [ 700/6250] eta: 0:15:32 lr: 0.000125 grad: 0.0846 (0.1002) loss: 0.8694 (0.8685) time: 0.1707 data: 0.0922 max mem: 9377 +Train: [7] [ 800/6250] eta: 0:15:10 lr: 0.000125 grad: 0.0801 (0.0998) loss: 0.8708 (0.8685) time: 0.1373 data: 0.0516 max mem: 9377 +Train: [7] [ 900/6250] eta: 0:14:44 lr: 0.000125 grad: 0.0845 (0.0986) loss: 0.8675 (0.8682) time: 0.1669 data: 0.0876 max mem: 9377 +Train: [7] [1000/6250] eta: 0:14:21 lr: 0.000125 grad: 0.0870 (0.0978) loss: 0.8699 (0.8683) time: 0.1446 data: 0.0582 max mem: 9377 +Train: [7] [1100/6250] eta: 0:14:04 lr: 0.000125 grad: 0.0837 (0.0981) loss: 0.8668 (0.8682) time: 0.1685 data: 0.0853 max mem: 9377 +Train: [7] [1200/6250] eta: 0:13:45 lr: 0.000125 grad: 0.0866 (0.0973) loss: 0.8671 (0.8680) time: 0.1734 data: 0.0875 max mem: 9377 +Train: [7] [1300/6250] eta: 0:13:26 lr: 0.000125 grad: 0.0821 (0.0968) loss: 0.8665 (0.8679) time: 0.1713 data: 0.0909 max mem: 9377 +Train: [7] [1400/6250] eta: 0:13:07 lr: 0.000125 grad: 0.1066 (0.0965) loss: 0.8665 (0.8678) time: 0.1797 data: 0.1004 max mem: 9377 +Train: [7] [1500/6250] eta: 0:12:48 lr: 0.000125 grad: 0.0861 (0.0964) loss: 0.8661 (0.8676) time: 0.1498 data: 0.0661 max mem: 9377 +Train: [7] [1600/6250] eta: 0:12:30 lr: 0.000125 grad: 0.0853 (0.0964) loss: 0.8673 (0.8676) time: 0.1617 data: 0.0730 max mem: 9377 +Train: [7] [1700/6250] eta: 0:12:12 lr: 0.000125 grad: 0.0838 (0.0961) loss: 0.8674 (0.8675) time: 0.1603 data: 0.0707 max mem: 9377 +Train: [7] [1800/6250] eta: 0:11:54 lr: 0.000125 grad: 0.0777 (0.0955) loss: 0.8651 (0.8675) time: 0.1473 data: 0.0540 max mem: 9377 +Train: [7] [1900/6250] eta: 0:11:37 lr: 0.000125 grad: 0.0785 (0.0952) loss: 0.8674 (0.8674) time: 0.1617 data: 0.0799 max mem: 9377 +Train: [7] [2000/6250] eta: 0:11:23 lr: 0.000125 grad: 0.0804 (0.0948) loss: 0.8609 (0.8672) time: 0.1785 data: 0.0942 max mem: 9377 +Train: [7] [2100/6250] eta: 0:11:07 lr: 0.000125 grad: 0.0868 (0.0947) loss: 0.8624 (0.8671) time: 0.1430 data: 0.0612 max mem: 9377 +Train: [7] [2200/6250] eta: 0:10:53 lr: 0.000125 grad: 0.0780 (0.0943) loss: 0.8676 (0.8670) time: 0.1706 data: 0.0796 max mem: 9377 +Train: [7] [2300/6250] eta: 0:10:35 lr: 0.000125 grad: 0.0788 (0.0943) loss: 0.8628 (0.8669) time: 0.1456 data: 0.0608 max mem: 9377 +Train: [7] [2400/6250] eta: 0:10:16 lr: 0.000125 grad: 0.0754 (0.0938) loss: 0.8677 (0.8668) time: 0.1294 data: 0.0424 max mem: 9377 +Train: [7] [2500/6250] eta: 0:09:59 lr: 0.000125 grad: 0.0879 (0.0939) loss: 0.8639 (0.8668) time: 0.1424 data: 0.0491 max mem: 9377 +Train: [7] [2600/6250] eta: 0:09:41 lr: 0.000125 grad: 0.0859 (0.0937) loss: 0.8681 (0.8667) time: 0.1469 data: 0.0700 max mem: 9377 +Train: [7] [2700/6250] eta: 0:09:25 lr: 0.000125 grad: 0.0903 (0.0933) loss: 0.8657 (0.8667) time: 0.1607 data: 0.0704 max mem: 9377 +Train: [7] [2800/6250] eta: 0:09:09 lr: 0.000125 grad: 0.0759 (0.0932) loss: 0.8611 (0.8667) time: 0.1483 data: 0.0684 max mem: 9377 +Train: [7] [2900/6250] eta: 0:08:51 lr: 0.000125 grad: 0.0829 (0.0930) loss: 0.8602 (0.8666) time: 0.1431 data: 0.0631 max mem: 9377 +Train: [7] [3000/6250] eta: 0:08:34 lr: 0.000125 grad: 0.0831 (0.0927) loss: 0.8625 (0.8665) time: 0.1452 data: 0.0634 max mem: 9377 +Train: [7] [3100/6250] eta: 0:08:17 lr: 0.000125 grad: 0.1133 (0.0931) loss: 0.8656 (0.8664) time: 0.1390 data: 0.0601 max mem: 9377 +Train: [7] [3200/6250] eta: 0:08:00 lr: 0.000125 grad: 0.0750 (0.0932) loss: 0.8616 (0.8663) time: 0.1459 data: 0.0608 max mem: 9377 +Train: [7] [3300/6250] eta: 0:07:43 lr: 0.000125 grad: 0.0846 (0.0929) loss: 0.8626 (0.8663) time: 0.1079 data: 0.0211 max mem: 9377 +Train: [7] [3400/6250] eta: 0:07:28 lr: 0.000125 grad: 0.0759 (0.0929) loss: 0.8616 (0.8662) time: 0.1812 data: 0.0849 max mem: 9377 +Train: [7] [3500/6250] eta: 0:07:13 lr: 0.000125 grad: 0.0889 (0.0926) loss: 0.8613 (0.8661) time: 0.1738 data: 0.0850 max mem: 9377 +Train: [7] [3600/6250] eta: 0:06:58 lr: 0.000125 grad: 0.0783 (0.0927) loss: 0.8600 (0.8661) time: 0.1591 data: 0.0751 max mem: 9377 +Train: [7] [3700/6250] eta: 0:06:43 lr: 0.000125 grad: 0.0737 (0.0926) loss: 0.8588 (0.8660) time: 0.1598 data: 0.0686 max mem: 9377 +Train: [7] [3800/6250] eta: 0:06:27 lr: 0.000125 grad: 0.0835 (0.0924) loss: 0.8626 (0.8659) time: 0.1655 data: 0.0778 max mem: 9377 +Train: [7] [3900/6250] eta: 0:06:11 lr: 0.000125 grad: 0.0837 (0.0922) loss: 0.8612 (0.8658) time: 0.1526 data: 0.0560 max mem: 9377 +Train: [7] [4000/6250] eta: 0:05:55 lr: 0.000125 grad: 0.0784 (0.0919) loss: 0.8623 (0.8657) time: 0.1515 data: 0.0622 max mem: 9377 +Train: [7] [4100/6250] eta: 0:05:39 lr: 0.000125 grad: 0.0716 (0.0917) loss: 0.8639 (0.8656) time: 0.1615 data: 0.0771 max mem: 9377 +Train: [7] [4200/6250] eta: 0:05:23 lr: 0.000125 grad: 0.0802 (0.0914) loss: 0.8659 (0.8656) time: 0.1453 data: 0.0628 max mem: 9377 +Train: [7] [4300/6250] eta: 0:05:07 lr: 0.000125 grad: 0.0799 (0.0911) loss: 0.8546 (0.8655) time: 0.1394 data: 0.0592 max mem: 9377 +Train: [7] [4400/6250] eta: 0:04:52 lr: 0.000125 grad: 0.0720 (0.0910) loss: 0.8638 (0.8654) time: 0.1733 data: 0.0819 max mem: 9377 +Train: [7] [4500/6250] eta: 0:04:36 lr: 0.000125 grad: 0.0733 (0.0907) loss: 0.8614 (0.8654) time: 0.1519 data: 0.0668 max mem: 9377 +Train: [7] [4600/6250] eta: 0:04:20 lr: 0.000125 grad: 0.0734 (0.0904) loss: 0.8642 (0.8653) time: 0.1431 data: 0.0596 max mem: 9377 +Train: [7] [4700/6250] eta: 0:04:05 lr: 0.000125 grad: 0.0792 (0.0903) loss: 0.8628 (0.8652) time: 0.1833 data: 0.0954 max mem: 9377 +Train: [7] [4800/6250] eta: 0:03:49 lr: 0.000125 grad: 0.0786 (0.0902) loss: 0.8605 (0.8652) time: 0.1899 data: 0.1118 max mem: 9377 +Train: [7] [4900/6250] eta: 0:03:33 lr: 0.000125 grad: 0.0763 (0.0900) loss: 0.8574 (0.8651) time: 0.1575 data: 0.0771 max mem: 9377 +Train: [7] [5000/6250] eta: 0:03:18 lr: 0.000125 grad: 0.0713 (0.0897) loss: 0.8631 (0.8650) time: 0.1728 data: 0.0961 max mem: 9377 +Train: [7] [5100/6250] eta: 0:03:02 lr: 0.000125 grad: 0.0816 (0.0895) loss: 0.8565 (0.8650) time: 0.1363 data: 0.0518 max mem: 9377 +Train: [7] [5200/6250] eta: 0:02:46 lr: 0.000125 grad: 0.0811 (0.0895) loss: 0.8619 (0.8649) time: 0.1369 data: 0.0446 max mem: 9377 +Train: [7] [5300/6250] eta: 0:02:31 lr: 0.000125 grad: 0.0793 (0.0894) loss: 0.8622 (0.8649) time: 0.1680 data: 0.0791 max mem: 9377 +Train: [7] [5400/6250] eta: 0:02:15 lr: 0.000125 grad: 0.0765 (0.0892) loss: 0.8603 (0.8648) time: 0.1606 data: 0.0826 max mem: 9377 +Train: [7] [5500/6250] eta: 0:01:59 lr: 0.000125 grad: 0.0800 (0.0890) loss: 0.8593 (0.8647) time: 0.1498 data: 0.0574 max mem: 9377 +Train: [7] [5600/6250] eta: 0:01:43 lr: 0.000125 grad: 0.0735 (0.0888) loss: 0.8591 (0.8647) time: 0.1560 data: 0.0697 max mem: 9377 +Train: [7] [5700/6250] eta: 0:01:27 lr: 0.000125 grad: 0.0729 (0.0885) loss: 0.8625 (0.8646) time: 0.1571 data: 0.0668 max mem: 9377 +Train: [7] [5800/6250] eta: 0:01:11 lr: 0.000125 grad: 0.0764 (0.0884) loss: 0.8601 (0.8646) time: 0.1562 data: 0.0733 max mem: 9377 +Train: [7] [5900/6250] eta: 0:00:55 lr: 0.000125 grad: 0.0767 (0.0882) loss: 0.8602 (0.8645) time: 0.1574 data: 0.0704 max mem: 9377 +Train: [7] [6000/6250] eta: 0:00:39 lr: 0.000125 grad: 0.0743 (0.0881) loss: 0.8589 (0.8645) time: 0.1500 data: 0.0673 max mem: 9377 +Train: [7] [6100/6250] eta: 0:00:23 lr: 0.000125 grad: 0.0815 (0.0880) loss: 0.8578 (0.8644) time: 0.1587 data: 0.0766 max mem: 9377 +Train: [7] [6200/6250] eta: 0:00:07 lr: 0.000125 grad: 0.0791 (0.0879) loss: 0.8619 (0.8644) time: 0.1946 data: 0.1109 max mem: 9377 +Train: [7] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0797 (0.0878) loss: 0.8646 (0.8644) time: 0.1523 data: 0.0744 max mem: 9377 +Train: [7] Total time: 0:16:40 (0.1601 s / it) +Averaged stats: lr: 0.000125 grad: 0.0797 (0.0878) loss: 0.8646 (0.8644) +Eval (hcp-train-subset): [7] [ 0/62] eta: 0:04:06 loss: 0.8566 (0.8566) time: 3.9718 data: 3.8769 max mem: 9377 +Eval (hcp-train-subset): [7] [61/62] eta: 0:00:00 loss: 0.8594 (0.8597) time: 0.1047 data: 0.0758 max mem: 9377 +Eval (hcp-train-subset): [7] Total time: 0:00:14 (0.2373 s / it) +Averaged stats (hcp-train-subset): loss: 0.8594 (0.8597) +Eval (hcp-val): [7] [ 0/62] eta: 0:04:56 loss: 0.8556 (0.8556) time: 4.7838 data: 4.7536 max mem: 9377 +Eval (hcp-val): [7] [61/62] eta: 0:00:00 loss: 0.8581 (0.8586) time: 0.1319 data: 0.1048 max mem: 9377 +Eval (hcp-val): [7] Total time: 0:00:13 (0.2132 s / it) +Averaged stats (hcp-val): loss: 0.8581 (0.8586) +Eval (nsd-val): [7] [ 0/62] eta: 0:04:42 loss: 0.8162 (0.8162) time: 4.5615 data: 4.5303 max mem: 9377 +Eval (nsd-val): [7] [61/62] eta: 0:00:00 loss: 0.8247 (0.8259) time: 0.1266 data: 0.1012 max mem: 9377 +Eval (nsd-val): [7] Total time: 0:00:12 (0.2053 s / it) +Averaged stats (nsd-val): loss: 0.8247 (0.8259) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [8] [ 0/6250] eta: 9:21:12 lr: 0.000125 grad: 0.0722 (0.0722) loss: 0.8965 (0.8965) time: 5.3876 data: 5.0407 max mem: 9377 +Train: [8] [ 100/6250] eta: 0:24:15 lr: 0.000125 grad: 0.0854 (0.0929) loss: 0.8611 (0.8707) time: 0.1670 data: 0.0709 max mem: 9377 +Train: [8] [ 200/6250] eta: 0:20:20 lr: 0.000125 grad: 0.0764 (0.0910) loss: 0.8555 (0.8644) time: 0.1456 data: 0.0646 max mem: 9377 +Train: [8] [ 300/6250] eta: 0:18:40 lr: 0.000125 grad: 0.0792 (0.0900) loss: 0.8565 (0.8609) time: 0.1723 data: 0.0802 max mem: 9377 +Train: [8] [ 400/6250] eta: 0:17:53 lr: 0.000125 grad: 0.0774 (0.0883) loss: 0.8605 (0.8599) time: 0.1694 data: 0.0738 max mem: 9377 +Train: [8] [ 500/6250] eta: 0:17:09 lr: 0.000125 grad: 0.0806 (0.0880) loss: 0.8575 (0.8591) time: 0.1470 data: 0.0489 max mem: 9377 +Train: [8] [ 600/6250] eta: 0:16:27 lr: 0.000125 grad: 0.0727 (0.0864) loss: 0.8616 (0.8584) time: 0.1430 data: 0.0522 max mem: 9377 +Train: [8] [ 700/6250] eta: 0:15:57 lr: 0.000125 grad: 0.0765 (0.0862) loss: 0.8536 (0.8581) time: 0.1583 data: 0.0692 max mem: 9377 +Train: [8] [ 800/6250] eta: 0:15:33 lr: 0.000125 grad: 0.0831 (0.0857) loss: 0.8565 (0.8579) time: 0.1810 data: 0.0983 max mem: 9377 +Train: [8] [ 900/6250] eta: 0:15:07 lr: 0.000125 grad: 0.0825 (0.0853) loss: 0.8584 (0.8575) time: 0.1689 data: 0.0784 max mem: 9377 +Train: [8] [1000/6250] eta: 0:14:39 lr: 0.000125 grad: 0.0702 (0.0855) loss: 0.8640 (0.8574) time: 0.1402 data: 0.0540 max mem: 9377 +Train: [8] [1100/6250] eta: 0:14:15 lr: 0.000125 grad: 0.0780 (0.0849) loss: 0.8545 (0.8573) time: 0.1480 data: 0.0619 max mem: 9377 +Train: [8] [1200/6250] eta: 0:13:52 lr: 0.000125 grad: 0.0751 (0.0843) loss: 0.8579 (0.8572) time: 0.1327 data: 0.0407 max mem: 9377 +Train: [8] [1300/6250] eta: 0:13:29 lr: 0.000125 grad: 0.0747 (0.0839) loss: 0.8531 (0.8570) time: 0.1510 data: 0.0589 max mem: 9377 +Train: [8] [1400/6250] eta: 0:13:07 lr: 0.000125 grad: 0.0745 (0.0838) loss: 0.8617 (0.8569) time: 0.1315 data: 0.0431 max mem: 9377 +Train: [8] [1500/6250] eta: 0:12:46 lr: 0.000125 grad: 0.0717 (0.0841) loss: 0.8554 (0.8568) time: 0.1240 data: 0.0330 max mem: 9377 +Train: [8] [1600/6250] eta: 0:12:26 lr: 0.000125 grad: 0.0730 (0.0837) loss: 0.8557 (0.8567) time: 0.1472 data: 0.0514 max mem: 9377 +Train: [8] [1700/6250] eta: 0:12:09 lr: 0.000125 grad: 0.0756 (0.0833) loss: 0.8574 (0.8567) time: 0.1834 data: 0.0978 max mem: 9377 +Train: [8] [1800/6250] eta: 0:11:47 lr: 0.000125 grad: 0.0796 (0.0835) loss: 0.8564 (0.8567) time: 0.1280 data: 0.0459 max mem: 9377 +Train: [8] [1900/6250] eta: 0:11:27 lr: 0.000125 grad: 0.0729 (0.0832) loss: 0.8609 (0.8568) time: 0.1315 data: 0.0418 max mem: 9377 +Train: [8] [2000/6250] eta: 0:11:11 lr: 0.000125 grad: 0.0750 (0.0833) loss: 0.8529 (0.8567) time: 0.1742 data: 0.0817 max mem: 9377 +Train: [8] [2100/6250] eta: 0:10:55 lr: 0.000125 grad: 0.0774 (0.0830) loss: 0.8606 (0.8567) time: 0.1345 data: 0.0430 max mem: 9377 +Train: [8] [2200/6250] eta: 0:10:40 lr: 0.000125 grad: 0.0711 (0.0828) loss: 0.8531 (0.8566) time: 0.1574 data: 0.0777 max mem: 9377 +Train: [8] [2300/6250] eta: 0:10:25 lr: 0.000125 grad: 0.0723 (0.0828) loss: 0.8538 (0.8566) time: 0.1758 data: 0.0909 max mem: 9377 +Train: [8] [2400/6250] eta: 0:10:12 lr: 0.000125 grad: 0.0737 (0.0827) loss: 0.8588 (0.8565) time: 0.1756 data: 0.0951 max mem: 9377 +Train: [8] [2500/6250] eta: 0:09:57 lr: 0.000125 grad: 0.0703 (0.0825) loss: 0.8597 (0.8565) time: 0.1641 data: 0.0825 max mem: 9377 +Train: [8] [2600/6250] eta: 0:09:43 lr: 0.000125 grad: 0.0744 (0.0824) loss: 0.8553 (0.8564) time: 0.1581 data: 0.0744 max mem: 9377 +Train: [8] [2700/6250] eta: 0:09:26 lr: 0.000125 grad: 0.0701 (0.0822) loss: 0.8612 (0.8565) time: 0.1514 data: 0.0630 max mem: 9377 +Train: [8] [2800/6250] eta: 0:09:11 lr: 0.000125 grad: 0.0715 (0.0820) loss: 0.8574 (0.8565) time: 0.1455 data: 0.0637 max mem: 9377 +Train: [8] [2900/6250] eta: 0:08:57 lr: 0.000125 grad: 0.0705 (0.0818) loss: 0.8566 (0.8566) time: 0.2116 data: 0.1357 max mem: 9377 +Train: [8] [3000/6250] eta: 0:08:40 lr: 0.000125 grad: 0.0725 (0.0816) loss: 0.8503 (0.8565) time: 0.1700 data: 0.0861 max mem: 9377 +Train: [8] [3100/6250] eta: 0:08:24 lr: 0.000125 grad: 0.0779 (0.0814) loss: 0.8547 (0.8565) time: 0.1526 data: 0.0676 max mem: 9377 +Train: [8] [3200/6250] eta: 0:08:07 lr: 0.000125 grad: 0.0661 (0.0812) loss: 0.8573 (0.8564) time: 0.1428 data: 0.0447 max mem: 9377 +Train: [8] [3300/6250] eta: 0:07:51 lr: 0.000125 grad: 0.0820 (0.0811) loss: 0.8532 (0.8564) time: 0.1636 data: 0.0826 max mem: 9377 +Train: [8] [3400/6250] eta: 0:07:33 lr: 0.000125 grad: 0.0706 (0.0809) loss: 0.8566 (0.8563) time: 0.1329 data: 0.0525 max mem: 9377 +Train: [8] [3500/6250] eta: 0:07:16 lr: 0.000125 grad: 0.0715 (0.0808) loss: 0.8565 (0.8563) time: 0.1437 data: 0.0549 max mem: 9377 +Train: [8] [3600/6250] eta: 0:07:01 lr: 0.000125 grad: 0.0698 (0.0805) loss: 0.8537 (0.8562) time: 0.1616 data: 0.0752 max mem: 9377 +Train: [8] [3700/6250] eta: 0:06:45 lr: 0.000125 grad: 0.0745 (0.0805) loss: 0.8533 (0.8562) time: 0.1699 data: 0.0876 max mem: 9377 +Train: [8] [3800/6250] eta: 0:06:29 lr: 0.000125 grad: 0.0740 (0.0804) loss: 0.8548 (0.8561) time: 0.1248 data: 0.0363 max mem: 9377 +Train: [8] [3900/6250] eta: 0:06:14 lr: 0.000125 grad: 0.0662 (0.0804) loss: 0.8554 (0.8561) time: 0.1915 data: 0.1046 max mem: 9377 +Train: [8] [4000/6250] eta: 0:05:57 lr: 0.000125 grad: 0.0724 (0.0803) loss: 0.8521 (0.8560) time: 0.1487 data: 0.0568 max mem: 9377 +Train: [8] [4100/6250] eta: 0:05:41 lr: 0.000125 grad: 0.0719 (0.0802) loss: 0.8546 (0.8560) time: 0.1518 data: 0.0628 max mem: 9377 +Train: [8] [4200/6250] eta: 0:05:25 lr: 0.000125 grad: 0.0700 (0.0800) loss: 0.8546 (0.8560) time: 0.1420 data: 0.0572 max mem: 9377 +Train: [8] [4300/6250] eta: 0:05:09 lr: 0.000125 grad: 0.0692 (0.0799) loss: 0.8544 (0.8560) time: 0.1660 data: 0.0883 max mem: 9377 +Train: [8] [4400/6250] eta: 0:04:53 lr: 0.000125 grad: 0.0721 (0.0797) loss: 0.8548 (0.8560) time: 0.1444 data: 0.0608 max mem: 9377 +Train: [8] [4500/6250] eta: 0:04:39 lr: 0.000125 grad: 0.0748 (0.0796) loss: 0.8572 (0.8560) time: 0.1675 data: 0.0789 max mem: 9377 +Train: [8] [4600/6250] eta: 0:04:23 lr: 0.000125 grad: 0.0678 (0.0795) loss: 0.8569 (0.8559) time: 0.2145 data: 0.1405 max mem: 9377 +Train: [8] [4700/6250] eta: 0:04:08 lr: 0.000125 grad: 0.0715 (0.0794) loss: 0.8566 (0.8559) time: 0.1606 data: 0.0771 max mem: 9377 +Train: [8] [4800/6250] eta: 0:03:52 lr: 0.000125 grad: 0.0731 (0.0793) loss: 0.8504 (0.8559) time: 0.1804 data: 0.0923 max mem: 9377 +Train: [8] [4900/6250] eta: 0:03:36 lr: 0.000125 grad: 0.0769 (0.0793) loss: 0.8560 (0.8558) time: 0.1749 data: 0.0989 max mem: 9377 +Train: [8] [5000/6250] eta: 0:03:20 lr: 0.000125 grad: 0.0705 (0.0792) loss: 0.8562 (0.8558) time: 0.1836 data: 0.0900 max mem: 9377 +Train: [8] [5100/6250] eta: 0:03:04 lr: 0.000125 grad: 0.0704 (0.0791) loss: 0.8551 (0.8558) time: 0.1791 data: 0.0926 max mem: 9377 +Train: [8] [5200/6250] eta: 0:02:48 lr: 0.000124 grad: 0.0687 (0.0790) loss: 0.8548 (0.8558) time: 0.1697 data: 0.0752 max mem: 9377 +Train: [8] [5300/6250] eta: 0:02:32 lr: 0.000124 grad: 0.0749 (0.0789) loss: 0.8590 (0.8558) time: 0.1326 data: 0.0446 max mem: 9377 +Train: [8] [5400/6250] eta: 0:02:16 lr: 0.000124 grad: 0.0748 (0.0788) loss: 0.8572 (0.8558) time: 0.1568 data: 0.0670 max mem: 9377 +Train: [8] [5500/6250] eta: 0:02:00 lr: 0.000124 grad: 0.0719 (0.0787) loss: 0.8565 (0.8558) time: 0.1880 data: 0.0908 max mem: 9377 +Train: [8] [5600/6250] eta: 0:01:44 lr: 0.000124 grad: 0.0705 (0.0785) loss: 0.8556 (0.8558) time: 0.1484 data: 0.0543 max mem: 9377 +Train: [8] [5700/6250] eta: 0:01:28 lr: 0.000124 grad: 0.0703 (0.0784) loss: 0.8557 (0.8558) time: 0.1647 data: 0.0744 max mem: 9377 +Train: [8] [5800/6250] eta: 0:01:12 lr: 0.000124 grad: 0.0642 (0.0784) loss: 0.8544 (0.8558) time: 0.1568 data: 0.0709 max mem: 9377 +Train: [8] [5900/6250] eta: 0:00:56 lr: 0.000124 grad: 0.0692 (0.0783) loss: 0.8547 (0.8558) time: 0.1827 data: 0.1037 max mem: 9377 +Train: [8] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.0693 (0.0782) loss: 0.8568 (0.8558) time: 0.1690 data: 0.0857 max mem: 9377 +Train: [8] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0714 (0.0781) loss: 0.8557 (0.8558) time: 0.1514 data: 0.0613 max mem: 9377 +Train: [8] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0748 (0.0781) loss: 0.8558 (0.8558) time: 0.1545 data: 0.0666 max mem: 9377 +Train: [8] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0729 (0.0781) loss: 0.8561 (0.8558) time: 0.1537 data: 0.0690 max mem: 9377 +Train: [8] Total time: 0:16:47 (0.1613 s / it) +Averaged stats: lr: 0.000124 grad: 0.0729 (0.0781) loss: 0.8561 (0.8558) +Eval (hcp-train-subset): [8] [ 0/62] eta: 0:06:21 loss: 0.8570 (0.8570) time: 6.1486 data: 6.1160 max mem: 9377 +Eval (hcp-train-subset): [8] [61/62] eta: 0:00:00 loss: 0.8542 (0.8556) time: 0.1297 data: 0.1027 max mem: 9377 +Eval (hcp-train-subset): [8] Total time: 0:00:14 (0.2309 s / it) +Averaged stats (hcp-train-subset): loss: 0.8542 (0.8556) +Eval (hcp-val): [8] [ 0/62] eta: 0:05:10 loss: 0.8540 (0.8540) time: 5.0055 data: 4.9742 max mem: 9377 +Eval (hcp-val): [8] [61/62] eta: 0:00:00 loss: 0.8547 (0.8548) time: 0.1241 data: 0.0989 max mem: 9377 +Eval (hcp-val): [8] Total time: 0:00:13 (0.2145 s / it) +Averaged stats (hcp-val): loss: 0.8547 (0.8548) +Eval (nsd-val): [8] [ 0/62] eta: 0:05:20 loss: 0.8084 (0.8084) time: 5.1720 data: 5.1311 max mem: 9377 +Eval (nsd-val): [8] [61/62] eta: 0:00:00 loss: 0.8198 (0.8206) time: 0.1151 data: 0.0876 max mem: 9377 +Eval (nsd-val): [8] Total time: 0:00:13 (0.2204 s / it) +Averaged stats (nsd-val): loss: 0.8198 (0.8206) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [9] [ 0/6250] eta: 8:56:01 lr: 0.000124 grad: 0.1040 (0.1040) loss: 0.8682 (0.8682) time: 5.1458 data: 4.8866 max mem: 9377 +Train: [9] [ 100/6250] eta: 0:21:14 lr: 0.000124 grad: 0.0798 (0.1035) loss: 0.8550 (0.8566) time: 0.1699 data: 0.0819 max mem: 9377 +Train: [9] [ 200/6250] eta: 0:18:43 lr: 0.000124 grad: 0.0694 (0.0911) loss: 0.8507 (0.8535) time: 0.1457 data: 0.0548 max mem: 9377 +Train: [9] [ 300/6250] eta: 0:17:32 lr: 0.000124 grad: 0.0675 (0.0857) loss: 0.8523 (0.8537) time: 0.1573 data: 0.0653 max mem: 9377 +Train: [9] [ 400/6250] eta: 0:16:33 lr: 0.000124 grad: 0.0707 (0.0826) loss: 0.8547 (0.8546) time: 0.1482 data: 0.0558 max mem: 9377 +Train: [9] [ 500/6250] eta: 0:16:18 lr: 0.000124 grad: 0.0677 (0.0809) loss: 0.8544 (0.8550) time: 0.1834 data: 0.0822 max mem: 9377 +Train: [9] [ 600/6250] eta: 0:15:56 lr: 0.000124 grad: 0.0649 (0.0797) loss: 0.8518 (0.8552) time: 0.1639 data: 0.0807 max mem: 9377 +Train: [9] [ 700/6250] eta: 0:15:26 lr: 0.000124 grad: 0.0695 (0.0786) loss: 0.8556 (0.8553) time: 0.1583 data: 0.0655 max mem: 9377 +Train: [9] [ 800/6250] eta: 0:15:03 lr: 0.000124 grad: 0.0696 (0.0775) loss: 0.8526 (0.8553) time: 0.1617 data: 0.0701 max mem: 9377 +Train: [9] [ 900/6250] eta: 0:14:46 lr: 0.000124 grad: 0.0647 (0.0767) loss: 0.8561 (0.8553) time: 0.1702 data: 0.0808 max mem: 9377 +Train: [9] [1000/6250] eta: 0:14:22 lr: 0.000124 grad: 0.0661 (0.0760) loss: 0.8565 (0.8555) time: 0.1531 data: 0.0702 max mem: 9377 +Train: [9] [1100/6250] eta: 0:13:57 lr: 0.000124 grad: 0.0680 (0.0756) loss: 0.8557 (0.8555) time: 0.1406 data: 0.0611 max mem: 9377 +Train: [9] [1200/6250] eta: 0:13:34 lr: 0.000124 grad: 0.0664 (0.0750) loss: 0.8580 (0.8556) time: 0.1457 data: 0.0535 max mem: 9377 +Train: [9] [1300/6250] eta: 0:13:15 lr: 0.000124 grad: 0.0681 (0.0746) loss: 0.8513 (0.8554) time: 0.1536 data: 0.0622 max mem: 9377 +Train: [9] [1400/6250] eta: 0:12:51 lr: 0.000124 grad: 0.0648 (0.0741) loss: 0.8533 (0.8554) time: 0.1421 data: 0.0505 max mem: 9377 +Train: [9] [1500/6250] eta: 0:12:30 lr: 0.000124 grad: 0.0682 (0.0738) loss: 0.8555 (0.8553) time: 0.1449 data: 0.0610 max mem: 9377 +Train: [9] [1600/6250] eta: 0:12:12 lr: 0.000124 grad: 0.0657 (0.0736) loss: 0.8549 (0.8552) time: 0.1475 data: 0.0677 max mem: 9377 +Train: [9] [1700/6250] eta: 0:11:53 lr: 0.000124 grad: 0.0732 (0.0736) loss: 0.8492 (0.8551) time: 0.1589 data: 0.0782 max mem: 9377 +Train: [9] [1800/6250] eta: 0:11:35 lr: 0.000124 grad: 0.0697 (0.0734) loss: 0.8570 (0.8550) time: 0.1473 data: 0.0630 max mem: 9377 +Train: [9] [1900/6250] eta: 0:11:20 lr: 0.000124 grad: 0.0705 (0.0732) loss: 0.8528 (0.8550) time: 0.1409 data: 0.0580 max mem: 9377 +Train: [9] [2000/6250] eta: 0:11:03 lr: 0.000124 grad: 0.0661 (0.0729) loss: 0.8556 (0.8549) time: 0.1393 data: 0.0509 max mem: 9377 +Train: [9] [2100/6250] eta: 0:10:47 lr: 0.000124 grad: 0.0719 (0.0730) loss: 0.8558 (0.8549) time: 0.1491 data: 0.0572 max mem: 9377 +Train: [9] [2200/6250] eta: 0:10:32 lr: 0.000124 grad: 0.0659 (0.0729) loss: 0.8528 (0.8549) time: 0.1633 data: 0.0887 max mem: 9377 +Train: [9] [2300/6250] eta: 0:10:14 lr: 0.000124 grad: 0.0761 (0.0729) loss: 0.8547 (0.8549) time: 0.1426 data: 0.0588 max mem: 9377 +Train: [9] [2400/6250] eta: 0:09:57 lr: 0.000124 grad: 0.0668 (0.0728) loss: 0.8600 (0.8550) time: 0.1450 data: 0.0558 max mem: 9377 +Train: [9] [2500/6250] eta: 0:09:42 lr: 0.000124 grad: 0.0627 (0.0727) loss: 0.8613 (0.8551) time: 0.1608 data: 0.0702 max mem: 9377 +Train: [9] [2600/6250] eta: 0:09:27 lr: 0.000124 grad: 0.0699 (0.0725) loss: 0.8561 (0.8551) time: 0.1540 data: 0.0714 max mem: 9377 +Train: [9] [2700/6250] eta: 0:09:12 lr: 0.000124 grad: 0.0643 (0.0724) loss: 0.8561 (0.8552) time: 0.1521 data: 0.0717 max mem: 9377 +Train: [9] [2800/6250] eta: 0:08:57 lr: 0.000124 grad: 0.0671 (0.0722) loss: 0.8573 (0.8552) time: 0.1686 data: 0.0822 max mem: 9377 +Train: [9] [2900/6250] eta: 0:08:41 lr: 0.000124 grad: 0.0654 (0.0720) loss: 0.8549 (0.8552) time: 0.1451 data: 0.0505 max mem: 9377 +Train: [9] [3000/6250] eta: 0:08:26 lr: 0.000124 grad: 0.0651 (0.0719) loss: 0.8565 (0.8552) time: 0.1622 data: 0.0819 max mem: 9377 +Train: [9] [3100/6250] eta: 0:08:10 lr: 0.000124 grad: 0.0627 (0.0717) loss: 0.8531 (0.8553) time: 0.1609 data: 0.0797 max mem: 9377 +Train: [9] [3200/6250] eta: 0:07:54 lr: 0.000124 grad: 0.0745 (0.0717) loss: 0.8551 (0.8553) time: 0.1553 data: 0.0718 max mem: 9377 +Train: [9] [3300/6250] eta: 0:07:38 lr: 0.000124 grad: 0.0677 (0.0716) loss: 0.8568 (0.8553) time: 0.1553 data: 0.0755 max mem: 9377 +Train: [9] [3400/6250] eta: 0:07:23 lr: 0.000124 grad: 0.0710 (0.0716) loss: 0.8559 (0.8553) time: 0.1497 data: 0.0603 max mem: 9377 +Train: [9] [3500/6250] eta: 0:07:07 lr: 0.000124 grad: 0.0721 (0.0716) loss: 0.8573 (0.8553) time: 0.1464 data: 0.0631 max mem: 9377 +Train: [9] [3600/6250] eta: 0:06:51 lr: 0.000124 grad: 0.0646 (0.0716) loss: 0.8548 (0.8553) time: 0.1536 data: 0.0659 max mem: 9377 +Train: [9] [3700/6250] eta: 0:06:36 lr: 0.000124 grad: 0.0709 (0.0714) loss: 0.8602 (0.8554) time: 0.1456 data: 0.0566 max mem: 9377 +Train: [9] [3800/6250] eta: 0:06:20 lr: 0.000124 grad: 0.0670 (0.0713) loss: 0.8562 (0.8555) time: 0.1458 data: 0.0592 max mem: 9377 +Train: [9] [3900/6250] eta: 0:06:04 lr: 0.000124 grad: 0.0681 (0.0713) loss: 0.8547 (0.8554) time: 0.1592 data: 0.0725 max mem: 9377 +Train: [9] [4000/6250] eta: 0:05:49 lr: 0.000124 grad: 0.0659 (0.0712) loss: 0.8541 (0.8554) time: 0.1460 data: 0.0541 max mem: 9377 +Train: [9] [4100/6250] eta: 0:05:35 lr: 0.000124 grad: 0.0678 (0.0712) loss: 0.8510 (0.8554) time: 0.2087 data: 0.1086 max mem: 9377 +Train: [9] [4200/6250] eta: 0:05:20 lr: 0.000124 grad: 0.0634 (0.0713) loss: 0.8572 (0.8554) time: 0.1594 data: 0.0795 max mem: 9377 +Train: [9] [4300/6250] eta: 0:05:05 lr: 0.000124 grad: 0.0629 (0.0712) loss: 0.8536 (0.8554) time: 0.2442 data: 0.1682 max mem: 9377 +Train: [9] [4400/6250] eta: 0:04:51 lr: 0.000124 grad: 0.0657 (0.0712) loss: 0.8552 (0.8555) time: 0.1738 data: 0.0872 max mem: 9377 +Train: [9] [4500/6250] eta: 0:04:37 lr: 0.000124 grad: 0.0672 (0.0711) loss: 0.8509 (0.8555) time: 0.2203 data: 0.1395 max mem: 9377 +Train: [9] [4600/6250] eta: 0:04:22 lr: 0.000124 grad: 0.0626 (0.0710) loss: 0.8549 (0.8554) time: 0.1671 data: 0.0896 max mem: 9377 +Train: [9] [4700/6250] eta: 0:04:06 lr: 0.000124 grad: 0.0652 (0.0709) loss: 0.8551 (0.8554) time: 0.1717 data: 0.0928 max mem: 9377 +Train: [9] [4800/6250] eta: 0:03:51 lr: 0.000124 grad: 0.0658 (0.0709) loss: 0.8571 (0.8554) time: 0.1738 data: 0.0910 max mem: 9377 +Train: [9] [4900/6250] eta: 0:03:35 lr: 0.000124 grad: 0.0654 (0.0709) loss: 0.8552 (0.8553) time: 0.1785 data: 0.0977 max mem: 9377 +Train: [9] [5000/6250] eta: 0:03:20 lr: 0.000124 grad: 0.0605 (0.0708) loss: 0.8524 (0.8553) time: 0.1721 data: 0.0830 max mem: 9377 +Train: [9] [5100/6250] eta: 0:03:04 lr: 0.000124 grad: 0.0648 (0.0708) loss: 0.8501 (0.8552) time: 0.1542 data: 0.0554 max mem: 9377 +Train: [9] [5200/6250] eta: 0:02:48 lr: 0.000124 grad: 0.0669 (0.0707) loss: 0.8522 (0.8552) time: 0.1860 data: 0.0915 max mem: 9377 +Train: [9] [5300/6250] eta: 0:02:33 lr: 0.000124 grad: 0.0703 (0.0707) loss: 0.8544 (0.8552) time: 0.1830 data: 0.0820 max mem: 9377 +Train: [9] [5400/6250] eta: 0:02:17 lr: 0.000124 grad: 0.0651 (0.0706) loss: 0.8539 (0.8551) time: 0.1962 data: 0.1023 max mem: 9377 +Train: [9] [5500/6250] eta: 0:02:01 lr: 0.000124 grad: 0.0683 (0.0706) loss: 0.8482 (0.8551) time: 0.1220 data: 0.0388 max mem: 9377 +Train: [9] [5600/6250] eta: 0:01:44 lr: 0.000124 grad: 0.0693 (0.0705) loss: 0.8503 (0.8550) time: 0.1682 data: 0.0801 max mem: 9377 +Train: [9] [5700/6250] eta: 0:01:28 lr: 0.000124 grad: 0.0670 (0.0705) loss: 0.8551 (0.8549) time: 0.1444 data: 0.0498 max mem: 9377 +Train: [9] [5800/6250] eta: 0:01:12 lr: 0.000124 grad: 0.0697 (0.0706) loss: 0.8493 (0.8549) time: 0.1446 data: 0.0473 max mem: 9377 +Train: [9] [5900/6250] eta: 0:00:56 lr: 0.000124 grad: 0.0677 (0.0706) loss: 0.8504 (0.8548) time: 0.1508 data: 0.0683 max mem: 9377 +Train: [9] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.0673 (0.0705) loss: 0.8504 (0.8548) time: 0.1659 data: 0.0758 max mem: 9377 +Train: [9] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0657 (0.0705) loss: 0.8491 (0.8547) time: 0.1313 data: 0.0320 max mem: 9377 +Train: [9] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0676 (0.0705) loss: 0.8480 (0.8546) time: 0.1335 data: 0.0511 max mem: 9377 +Train: [9] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0629 (0.0705) loss: 0.8506 (0.8546) time: 0.1466 data: 0.0598 max mem: 9377 +Train: [9] Total time: 0:16:49 (0.1615 s / it) +Averaged stats: lr: 0.000124 grad: 0.0629 (0.0705) loss: 0.8506 (0.8546) +Eval (hcp-train-subset): [9] [ 0/62] eta: 0:05:25 loss: 0.8496 (0.8496) time: 5.2496 data: 5.1817 max mem: 9377 +Eval (hcp-train-subset): [9] [61/62] eta: 0:00:00 loss: 0.8532 (0.8527) time: 0.1397 data: 0.1141 max mem: 9377 +Eval (hcp-train-subset): [9] Total time: 0:00:15 (0.2551 s / it) +Averaged stats (hcp-train-subset): loss: 0.8532 (0.8527) +Making plots (hcp-train-subset): example=1 +Eval (hcp-val): [9] [ 0/62] eta: 0:05:05 loss: 0.8493 (0.8493) time: 4.9248 data: 4.8927 max mem: 9377 +Eval (hcp-val): [9] [61/62] eta: 0:00:00 loss: 0.8520 (0.8517) time: 0.1250 data: 0.1001 max mem: 9377 +Eval (hcp-val): [9] Total time: 0:00:13 (0.2128 s / it) +Averaged stats (hcp-val): loss: 0.8520 (0.8517) +Making plots (hcp-val): example=3 +Eval (nsd-val): [9] [ 0/62] eta: 0:04:48 loss: 0.8076 (0.8076) time: 4.6487 data: 4.6171 max mem: 9377 +Eval (nsd-val): [9] [61/62] eta: 0:00:00 loss: 0.8162 (0.8176) time: 0.1408 data: 0.1156 max mem: 9377 +Eval (nsd-val): [9] Total time: 0:00:13 (0.2173 s / it) +Averaged stats (nsd-val): loss: 0.8162 (0.8176) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00009.pth +Train: [10] [ 0/6250] eta: 10:47:59 lr: 0.000124 grad: 0.0681 (0.0681) loss: 0.9051 (0.9051) time: 6.2207 data: 6.0877 max mem: 9377 +Train: [10] [ 100/6250] eta: 0:21:13 lr: 0.000124 grad: 0.0678 (0.0753) loss: 0.8482 (0.8601) time: 0.1400 data: 0.0462 max mem: 9377 +Train: [10] [ 200/6250] eta: 0:18:31 lr: 0.000124 grad: 0.0676 (0.0737) loss: 0.8511 (0.8563) time: 0.1563 data: 0.0706 max mem: 9377 +Train: [10] [ 300/6250] eta: 0:17:21 lr: 0.000124 grad: 0.0663 (0.0731) loss: 0.8584 (0.8559) time: 0.1521 data: 0.0695 max mem: 9377 +Train: [10] [ 400/6250] eta: 0:16:29 lr: 0.000124 grad: 0.0661 (0.0724) loss: 0.8528 (0.8554) time: 0.1577 data: 0.0700 max mem: 9377 +Train: [10] [ 500/6250] eta: 0:15:58 lr: 0.000124 grad: 0.0667 (0.0723) loss: 0.8533 (0.8549) time: 0.1598 data: 0.0737 max mem: 9377 +Train: [10] [ 600/6250] eta: 0:15:22 lr: 0.000124 grad: 0.0714 (0.0723) loss: 0.8518 (0.8538) time: 0.1640 data: 0.0776 max mem: 9377 +Train: [10] [ 700/6250] eta: 0:14:48 lr: 0.000124 grad: 0.0687 (0.0721) loss: 0.8480 (0.8531) time: 0.1346 data: 0.0473 max mem: 9377 +Train: [10] [ 800/6250] eta: 0:14:21 lr: 0.000124 grad: 0.0628 (0.0716) loss: 0.8539 (0.8528) time: 0.1290 data: 0.0408 max mem: 9377 +Train: [10] [ 900/6250] eta: 0:14:01 lr: 0.000124 grad: 0.0639 (0.0713) loss: 0.8513 (0.8527) time: 0.1542 data: 0.0681 max mem: 9377 +Train: [10] [1000/6250] eta: 0:13:40 lr: 0.000124 grad: 0.0722 (0.0709) loss: 0.8538 (0.8527) time: 0.1453 data: 0.0606 max mem: 9377 +Train: [10] [1100/6250] eta: 0:13:20 lr: 0.000124 grad: 0.0633 (0.0706) loss: 0.8513 (0.8527) time: 0.1292 data: 0.0390 max mem: 9377 +Train: [10] [1200/6250] eta: 0:12:59 lr: 0.000124 grad: 0.0648 (0.0702) loss: 0.8544 (0.8527) time: 0.1400 data: 0.0562 max mem: 9377 +Train: [10] [1300/6250] eta: 0:12:39 lr: 0.000124 grad: 0.0653 (0.0701) loss: 0.8543 (0.8528) time: 0.1427 data: 0.0605 max mem: 9377 +Train: [10] [1400/6250] eta: 0:12:21 lr: 0.000124 grad: 0.0628 (0.0697) loss: 0.8570 (0.8528) time: 0.1363 data: 0.0510 max mem: 9377 +Train: [10] [1500/6250] eta: 0:12:03 lr: 0.000124 grad: 0.0659 (0.0696) loss: 0.8500 (0.8527) time: 0.1562 data: 0.0742 max mem: 9377 +Train: [10] [1600/6250] eta: 0:11:45 lr: 0.000124 grad: 0.0644 (0.0696) loss: 0.8517 (0.8528) time: 0.1640 data: 0.0847 max mem: 9377 +Train: [10] [1700/6250] eta: 0:11:26 lr: 0.000124 grad: 0.0667 (0.0694) loss: 0.8513 (0.8527) time: 0.1540 data: 0.0689 max mem: 9377 +Train: [10] [1800/6250] eta: 0:11:08 lr: 0.000124 grad: 0.0637 (0.0695) loss: 0.8520 (0.8527) time: 0.1441 data: 0.0595 max mem: 9377 +Train: [10] [1900/6250] eta: 0:10:51 lr: 0.000124 grad: 0.0659 (0.0693) loss: 0.8557 (0.8527) time: 0.1370 data: 0.0484 max mem: 9377 +Train: [10] [2000/6250] eta: 0:10:33 lr: 0.000124 grad: 0.0640 (0.0691) loss: 0.8571 (0.8527) time: 0.1399 data: 0.0624 max mem: 9377 +Train: [10] [2100/6250] eta: 0:10:18 lr: 0.000124 grad: 0.0624 (0.0690) loss: 0.8521 (0.8528) time: 0.1458 data: 0.0625 max mem: 9377 +Train: [10] [2200/6250] eta: 0:10:02 lr: 0.000124 grad: 0.0625 (0.0693) loss: 0.8477 (0.8526) time: 0.1449 data: 0.0583 max mem: 9377 +Train: [10] [2300/6250] eta: 0:09:48 lr: 0.000124 grad: 0.0662 (0.0692) loss: 0.8505 (0.8526) time: 0.1551 data: 0.0633 max mem: 9377 +Train: [10] [2400/6250] eta: 0:09:34 lr: 0.000124 grad: 0.0683 (0.0691) loss: 0.8467 (0.8526) time: 0.1609 data: 0.0830 max mem: 9377 +Train: [10] [2500/6250] eta: 0:09:19 lr: 0.000124 grad: 0.0662 (0.0691) loss: 0.8512 (0.8525) time: 0.1530 data: 0.0780 max mem: 9377 +Train: [10] [2600/6250] eta: 0:09:04 lr: 0.000124 grad: 0.0663 (0.0690) loss: 0.8519 (0.8524) time: 0.1373 data: 0.0564 max mem: 9377 +Train: [10] [2700/6250] eta: 0:08:49 lr: 0.000124 grad: 0.0636 (0.0689) loss: 0.8524 (0.8524) time: 0.1458 data: 0.0664 max mem: 9377 +Train: [10] [2800/6250] eta: 0:08:36 lr: 0.000124 grad: 0.0620 (0.0688) loss: 0.8518 (0.8524) time: 0.1644 data: 0.0814 max mem: 9377 +Train: [10] [2900/6250] eta: 0:08:22 lr: 0.000124 grad: 0.0658 (0.0687) loss: 0.8505 (0.8523) time: 0.1592 data: 0.0767 max mem: 9377 +Train: [10] [3000/6250] eta: 0:08:09 lr: 0.000124 grad: 0.0681 (0.0687) loss: 0.8485 (0.8522) time: 0.1614 data: 0.0873 max mem: 9377 +Train: [10] [3100/6250] eta: 0:07:53 lr: 0.000124 grad: 0.0639 (0.0687) loss: 0.8551 (0.8521) time: 0.1307 data: 0.0543 max mem: 9377 +Train: [10] [3200/6250] eta: 0:07:38 lr: 0.000124 grad: 0.0691 (0.0686) loss: 0.8483 (0.8521) time: 0.1423 data: 0.0580 max mem: 9377 +Train: [10] [3300/6250] eta: 0:07:23 lr: 0.000124 grad: 0.0711 (0.0687) loss: 0.8535 (0.8520) time: 0.1408 data: 0.0624 max mem: 9377 +Train: [10] [3400/6250] eta: 0:07:08 lr: 0.000124 grad: 0.0684 (0.0687) loss: 0.8559 (0.8520) time: 0.1568 data: 0.0747 max mem: 9377 +Train: [10] [3500/6250] eta: 0:06:53 lr: 0.000124 grad: 0.0632 (0.0687) loss: 0.8497 (0.8519) time: 0.1373 data: 0.0537 max mem: 9377 +Train: [10] [3600/6250] eta: 0:06:37 lr: 0.000124 grad: 0.0644 (0.0685) loss: 0.8476 (0.8519) time: 0.1270 data: 0.0481 max mem: 9377 +Train: [10] [3700/6250] eta: 0:06:22 lr: 0.000124 grad: 0.0615 (0.0684) loss: 0.8487 (0.8519) time: 0.1449 data: 0.0583 max mem: 9377 +Train: [10] [3800/6250] eta: 0:06:07 lr: 0.000124 grad: 0.0652 (0.0684) loss: 0.8487 (0.8518) time: 0.1537 data: 0.0676 max mem: 9377 +Train: [10] [3900/6250] eta: 0:05:53 lr: 0.000124 grad: 0.0626 (0.0683) loss: 0.8470 (0.8518) time: 0.1494 data: 0.0730 max mem: 9377 +Train: [10] [4000/6250] eta: 0:05:37 lr: 0.000124 grad: 0.0673 (0.0684) loss: 0.8514 (0.8517) time: 0.1597 data: 0.0804 max mem: 9377 +Train: [10] [4100/6250] eta: 0:05:21 lr: 0.000124 grad: 0.0635 (0.0683) loss: 0.8442 (0.8516) time: 0.1366 data: 0.0560 max mem: 9377 +Train: [10] [4200/6250] eta: 0:05:07 lr: 0.000124 grad: 0.0643 (0.0683) loss: 0.8489 (0.8516) time: 0.1747 data: 0.0900 max mem: 9377 +Train: [10] [4300/6250] eta: 0:04:54 lr: 0.000124 grad: 0.0629 (0.0682) loss: 0.8488 (0.8515) time: 0.2129 data: 0.1166 max mem: 9377 +Train: [10] [4400/6250] eta: 0:04:40 lr: 0.000124 grad: 0.0610 (0.0681) loss: 0.8489 (0.8514) time: 0.1885 data: 0.1087 max mem: 9377 +Train: [10] [4500/6250] eta: 0:04:26 lr: 0.000124 grad: 0.0625 (0.0681) loss: 0.8503 (0.8514) time: 0.1598 data: 0.0846 max mem: 9377 +Train: [10] [4600/6250] eta: 0:04:11 lr: 0.000124 grad: 0.0668 (0.0680) loss: 0.8437 (0.8513) time: 0.1841 data: 0.1083 max mem: 9377 +Train: [10] [4700/6250] eta: 0:03:57 lr: 0.000124 grad: 0.0654 (0.0680) loss: 0.8513 (0.8513) time: 0.1768 data: 0.0927 max mem: 9377 +Train: [10] [4800/6250] eta: 0:03:42 lr: 0.000124 grad: 0.0637 (0.0682) loss: 0.8510 (0.8513) time: 0.1582 data: 0.0719 max mem: 9377 +Train: [10] [4900/6250] eta: 0:03:27 lr: 0.000124 grad: 0.0673 (0.0681) loss: 0.8497 (0.8513) time: 0.1724 data: 0.0851 max mem: 9377 +Train: [10] [5000/6250] eta: 0:03:12 lr: 0.000124 grad: 0.0648 (0.0681) loss: 0.8455 (0.8512) time: 0.1618 data: 0.0647 max mem: 9377 +Train: [10] [5100/6250] eta: 0:02:56 lr: 0.000124 grad: 0.0676 (0.0681) loss: 0.8507 (0.8512) time: 0.1483 data: 0.0526 max mem: 9377 +Train: [10] [5200/6250] eta: 0:02:41 lr: 0.000124 grad: 0.0678 (0.0681) loss: 0.8527 (0.8511) time: 0.1702 data: 0.0819 max mem: 9377 +Train: [10] [5300/6250] eta: 0:02:26 lr: 0.000124 grad: 0.0661 (0.0680) loss: 0.8558 (0.8511) time: 0.1169 data: 0.0282 max mem: 9377 +Train: [10] [5400/6250] eta: 0:02:10 lr: 0.000124 grad: 0.0631 (0.0681) loss: 0.8490 (0.8511) time: 0.1411 data: 0.0552 max mem: 9377 +Train: [10] [5500/6250] eta: 0:01:55 lr: 0.000124 grad: 0.0700 (0.0681) loss: 0.8511 (0.8511) time: 0.1379 data: 0.0501 max mem: 9377 +Train: [10] [5600/6250] eta: 0:01:39 lr: 0.000124 grad: 0.0651 (0.0681) loss: 0.8500 (0.8511) time: 0.1395 data: 0.0591 max mem: 9377 +Train: [10] [5700/6250] eta: 0:01:24 lr: 0.000124 grad: 0.0640 (0.0681) loss: 0.8446 (0.8510) time: 0.1399 data: 0.0576 max mem: 9377 +Train: [10] [5800/6250] eta: 0:01:08 lr: 0.000124 grad: 0.0597 (0.0680) loss: 0.8500 (0.8510) time: 0.1432 data: 0.0603 max mem: 9377 +Train: [10] [5900/6250] eta: 0:00:53 lr: 0.000124 grad: 0.0689 (0.0680) loss: 0.8505 (0.8510) time: 0.1571 data: 0.0713 max mem: 9377 +Train: [10] [6000/6250] eta: 0:00:38 lr: 0.000124 grad: 0.0713 (0.0681) loss: 0.8520 (0.8509) time: 0.1423 data: 0.0623 max mem: 9377 +Train: [10] [6100/6250] eta: 0:00:22 lr: 0.000124 grad: 0.0667 (0.0680) loss: 0.8480 (0.8509) time: 0.1450 data: 0.0593 max mem: 9377 +Train: [10] [6200/6250] eta: 0:00:07 lr: 0.000124 grad: 0.0679 (0.0681) loss: 0.8512 (0.8509) time: 0.1409 data: 0.0574 max mem: 9377 +Train: [10] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0625 (0.0680) loss: 0.8520 (0.8509) time: 0.1578 data: 0.0762 max mem: 9377 +Train: [10] Total time: 0:15:55 (0.1529 s / it) +Averaged stats: lr: 0.000124 grad: 0.0625 (0.0680) loss: 0.8520 (0.8509) +Eval (hcp-train-subset): [10] [ 0/62] eta: 0:06:14 loss: 0.8490 (0.8490) time: 6.0363 data: 6.0032 max mem: 9377 +Eval (hcp-train-subset): [10] [61/62] eta: 0:00:00 loss: 0.8471 (0.8497) time: 0.1523 data: 0.1269 max mem: 9377 +Eval (hcp-train-subset): [10] Total time: 0:00:14 (0.2357 s / it) +Averaged stats (hcp-train-subset): loss: 0.8471 (0.8497) +Eval (hcp-val): [10] [ 0/62] eta: 0:05:35 loss: 0.8490 (0.8490) time: 5.4184 data: 5.3868 max mem: 9377 +Eval (hcp-val): [10] [61/62] eta: 0:00:00 loss: 0.8485 (0.8489) time: 0.1160 data: 0.0894 max mem: 9377 +Eval (hcp-val): [10] Total time: 0:00:13 (0.2164 s / it) +Averaged stats (hcp-val): loss: 0.8485 (0.8489) +Eval (nsd-val): [10] [ 0/62] eta: 0:04:37 loss: 0.8093 (0.8093) time: 4.4730 data: 4.4412 max mem: 9377 +Eval (nsd-val): [10] [61/62] eta: 0:00:00 loss: 0.8180 (0.8188) time: 0.1295 data: 0.1042 max mem: 9377 +Eval (nsd-val): [10] Total time: 0:00:12 (0.2028 s / it) +Averaged stats (nsd-val): loss: 0.8180 (0.8188) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [11] [ 0/6250] eta: 9:00:02 lr: 0.000124 grad: 0.0696 (0.0696) loss: 0.8719 (0.8719) time: 5.1845 data: 5.0848 max mem: 9377 +Train: [11] [ 100/6250] eta: 0:20:14 lr: 0.000124 grad: 0.0672 (0.0710) loss: 0.8537 (0.8588) time: 0.1450 data: 0.0624 max mem: 9377 +Train: [11] [ 200/6250] eta: 0:17:46 lr: 0.000124 grad: 0.0665 (0.0704) loss: 0.8448 (0.8540) time: 0.1669 data: 0.0842 max mem: 9377 +Train: [11] [ 300/6250] eta: 0:16:41 lr: 0.000124 grad: 0.0655 (0.0696) loss: 0.8450 (0.8525) time: 0.1373 data: 0.0449 max mem: 9377 +Train: [11] [ 400/6250] eta: 0:16:02 lr: 0.000124 grad: 0.0687 (0.0700) loss: 0.8451 (0.8515) time: 0.1562 data: 0.0692 max mem: 9377 +Train: [11] [ 500/6250] eta: 0:15:27 lr: 0.000124 grad: 0.0628 (0.0707) loss: 0.8425 (0.8503) time: 0.1384 data: 0.0487 max mem: 9377 +Train: [11] [ 600/6250] eta: 0:15:00 lr: 0.000124 grad: 0.0684 (0.0709) loss: 0.8381 (0.8492) time: 0.1587 data: 0.0737 max mem: 9377 +Train: [11] [ 700/6250] eta: 0:14:31 lr: 0.000124 grad: 0.0619 (0.0702) loss: 0.8501 (0.8488) time: 0.1243 data: 0.0349 max mem: 9377 +Train: [11] [ 800/6250] eta: 0:14:02 lr: 0.000124 grad: 0.0616 (0.0698) loss: 0.8443 (0.8486) time: 0.1397 data: 0.0564 max mem: 9377 +Train: [11] [ 900/6250] eta: 0:13:39 lr: 0.000124 grad: 0.0644 (0.0693) loss: 0.8468 (0.8485) time: 0.1347 data: 0.0504 max mem: 9377 +Train: [11] [1000/6250] eta: 0:13:16 lr: 0.000124 grad: 0.0693 (0.0692) loss: 0.8463 (0.8484) time: 0.1288 data: 0.0489 max mem: 9377 +Train: [11] [1100/6250] eta: 0:12:59 lr: 0.000124 grad: 0.0641 (0.0688) loss: 0.8420 (0.8483) time: 0.1509 data: 0.0767 max mem: 9377 +Train: [11] [1200/6250] eta: 0:12:36 lr: 0.000124 grad: 0.0630 (0.0685) loss: 0.8514 (0.8484) time: 0.1243 data: 0.0439 max mem: 9377 +Train: [11] [1300/6250] eta: 0:12:17 lr: 0.000124 grad: 0.0635 (0.0680) loss: 0.8451 (0.8484) time: 0.1407 data: 0.0606 max mem: 9377 +Train: [11] [1400/6250] eta: 0:11:59 lr: 0.000124 grad: 0.0624 (0.0677) loss: 0.8488 (0.8484) time: 0.1207 data: 0.0353 max mem: 9377 +Train: [11] [1500/6250] eta: 0:11:43 lr: 0.000124 grad: 0.0630 (0.0674) loss: 0.8470 (0.8484) time: 0.1501 data: 0.0692 max mem: 9377 +Train: [11] [1600/6250] eta: 0:11:24 lr: 0.000124 grad: 0.0635 (0.0675) loss: 0.8465 (0.8484) time: 0.1225 data: 0.0404 max mem: 9377 +Train: [11] [1700/6250] eta: 0:11:09 lr: 0.000124 grad: 0.0608 (0.0674) loss: 0.8510 (0.8484) time: 0.1296 data: 0.0481 max mem: 9377 +Train: [11] [1800/6250] eta: 0:10:54 lr: 0.000124 grad: 0.0618 (0.0671) loss: 0.8448 (0.8484) time: 0.1452 data: 0.0637 max mem: 9377 +Train: [11] [1900/6250] eta: 0:10:38 lr: 0.000124 grad: 0.0649 (0.0671) loss: 0.8523 (0.8483) time: 0.1457 data: 0.0668 max mem: 9377 +Train: [11] [2000/6250] eta: 0:10:21 lr: 0.000124 grad: 0.0607 (0.0670) loss: 0.8463 (0.8484) time: 0.1356 data: 0.0503 max mem: 9377 +Train: [11] [2100/6250] eta: 0:10:07 lr: 0.000124 grad: 0.0630 (0.0670) loss: 0.8491 (0.8484) time: 0.1425 data: 0.0570 max mem: 9377 +Train: [11] [2200/6250] eta: 0:09:51 lr: 0.000124 grad: 0.0666 (0.0670) loss: 0.8487 (0.8485) time: 0.1449 data: 0.0655 max mem: 9377 +Train: [11] [2300/6250] eta: 0:09:36 lr: 0.000124 grad: 0.0630 (0.0668) loss: 0.8529 (0.8485) time: 0.1391 data: 0.0614 max mem: 9377 +Train: [11] [2400/6250] eta: 0:09:21 lr: 0.000124 grad: 0.0621 (0.0668) loss: 0.8507 (0.8485) time: 0.1491 data: 0.0697 max mem: 9377 +Train: [11] [2500/6250] eta: 0:09:06 lr: 0.000124 grad: 0.0635 (0.0667) loss: 0.8473 (0.8485) time: 0.1469 data: 0.0675 max mem: 9377 +Train: [11] [2600/6250] eta: 0:08:51 lr: 0.000124 grad: 0.0622 (0.0668) loss: 0.8529 (0.8486) time: 0.1520 data: 0.0742 max mem: 9377 +Train: [11] [2700/6250] eta: 0:08:36 lr: 0.000124 grad: 0.0683 (0.0668) loss: 0.8516 (0.8486) time: 0.1343 data: 0.0545 max mem: 9377 +Train: [11] [2800/6250] eta: 0:08:21 lr: 0.000124 grad: 0.0605 (0.0666) loss: 0.8508 (0.8486) time: 0.1369 data: 0.0533 max mem: 9377 +Train: [11] [2900/6250] eta: 0:08:07 lr: 0.000124 grad: 0.0651 (0.0666) loss: 0.8481 (0.8486) time: 0.1554 data: 0.0784 max mem: 9377 +Train: [11] [3000/6250] eta: 0:07:53 lr: 0.000124 grad: 0.0655 (0.0666) loss: 0.8423 (0.8487) time: 0.1510 data: 0.0668 max mem: 9377 +Train: [11] [3100/6250] eta: 0:07:38 lr: 0.000124 grad: 0.0607 (0.0666) loss: 0.8473 (0.8486) time: 0.1243 data: 0.0386 max mem: 9377 +Train: [11] [3200/6250] eta: 0:07:25 lr: 0.000124 grad: 0.0645 (0.0665) loss: 0.8446 (0.8486) time: 0.1427 data: 0.0611 max mem: 9377 +Train: [11] [3300/6250] eta: 0:07:10 lr: 0.000124 grad: 0.0651 (0.0664) loss: 0.8451 (0.8486) time: 0.1445 data: 0.0671 max mem: 9377 +Train: [11] [3400/6250] eta: 0:06:56 lr: 0.000124 grad: 0.0602 (0.0663) loss: 0.8481 (0.8487) time: 0.1552 data: 0.0805 max mem: 9377 +Train: [11] [3500/6250] eta: 0:06:42 lr: 0.000124 grad: 0.0612 (0.0663) loss: 0.8514 (0.8487) time: 0.1461 data: 0.0642 max mem: 9377 +Train: [11] [3600/6250] eta: 0:06:27 lr: 0.000124 grad: 0.0639 (0.0664) loss: 0.8500 (0.8487) time: 0.1193 data: 0.0370 max mem: 9377 +Train: [11] [3700/6250] eta: 0:06:14 lr: 0.000124 grad: 0.0656 (0.0664) loss: 0.8446 (0.8487) time: 0.1486 data: 0.0651 max mem: 9377 +Train: [11] [3800/6250] eta: 0:06:00 lr: 0.000124 grad: 0.0635 (0.0663) loss: 0.8520 (0.8487) time: 0.1592 data: 0.0787 max mem: 9377 +Train: [11] [3900/6250] eta: 0:05:46 lr: 0.000124 grad: 0.0684 (0.0665) loss: 0.8450 (0.8486) time: 0.1678 data: 0.0877 max mem: 9377 +Train: [11] [4000/6250] eta: 0:05:32 lr: 0.000123 grad: 0.0611 (0.0664) loss: 0.8453 (0.8486) time: 0.1798 data: 0.1063 max mem: 9377 +Train: [11] [4100/6250] eta: 0:05:19 lr: 0.000123 grad: 0.0633 (0.0664) loss: 0.8489 (0.8485) time: 0.1804 data: 0.1015 max mem: 9377 +Train: [11] [4200/6250] eta: 0:05:04 lr: 0.000123 grad: 0.0622 (0.0664) loss: 0.8504 (0.8485) time: 0.1604 data: 0.0760 max mem: 9377 +Train: [11] [4300/6250] eta: 0:04:50 lr: 0.000123 grad: 0.0643 (0.0663) loss: 0.8488 (0.8485) time: 0.1690 data: 0.0889 max mem: 9377 +Train: [11] [4400/6250] eta: 0:04:35 lr: 0.000123 grad: 0.0628 (0.0663) loss: 0.8500 (0.8486) time: 0.1757 data: 0.0940 max mem: 9377 +Train: [11] [4500/6250] eta: 0:04:21 lr: 0.000123 grad: 0.0637 (0.0663) loss: 0.8508 (0.8486) time: 0.1869 data: 0.1012 max mem: 9377 +Train: [11] [4600/6250] eta: 0:04:07 lr: 0.000123 grad: 0.0647 (0.0662) loss: 0.8471 (0.8486) time: 0.1661 data: 0.0875 max mem: 9377 +Train: [11] [4700/6250] eta: 0:03:53 lr: 0.000123 grad: 0.0629 (0.0663) loss: 0.8515 (0.8486) time: 0.1725 data: 0.0735 max mem: 9377 +Train: [11] [4800/6250] eta: 0:03:39 lr: 0.000123 grad: 0.0678 (0.0662) loss: 0.8455 (0.8486) time: 0.1579 data: 0.0598 max mem: 9377 +Train: [11] [4900/6250] eta: 0:03:24 lr: 0.000123 grad: 0.0643 (0.0662) loss: 0.8494 (0.8486) time: 0.1151 data: 0.0171 max mem: 9377 +Train: [11] [5000/6250] eta: 0:03:08 lr: 0.000123 grad: 0.0626 (0.0662) loss: 0.8507 (0.8486) time: 0.1476 data: 0.0628 max mem: 9377 +Train: [11] [5100/6250] eta: 0:02:53 lr: 0.000123 grad: 0.0630 (0.0662) loss: 0.8490 (0.8487) time: 0.1638 data: 0.0828 max mem: 9377 +Train: [11] [5200/6250] eta: 0:02:38 lr: 0.000123 grad: 0.0597 (0.0661) loss: 0.8509 (0.8487) time: 0.1420 data: 0.0632 max mem: 9377 +Train: [11] [5300/6250] eta: 0:02:23 lr: 0.000123 grad: 0.0629 (0.0661) loss: 0.8511 (0.8487) time: 0.1542 data: 0.0632 max mem: 9377 +Train: [11] [5400/6250] eta: 0:02:08 lr: 0.000123 grad: 0.0603 (0.0660) loss: 0.8455 (0.8486) time: 0.1667 data: 0.0834 max mem: 9377 +Train: [11] [5500/6250] eta: 0:01:53 lr: 0.000123 grad: 0.0638 (0.0660) loss: 0.8503 (0.8487) time: 0.1324 data: 0.0489 max mem: 9377 +Train: [11] [5600/6250] eta: 0:01:37 lr: 0.000123 grad: 0.0634 (0.0660) loss: 0.8519 (0.8487) time: 0.1311 data: 0.0505 max mem: 9377 +Train: [11] [5700/6250] eta: 0:01:22 lr: 0.000123 grad: 0.0657 (0.0660) loss: 0.8522 (0.8487) time: 0.1285 data: 0.0424 max mem: 9377 +Train: [11] [5800/6250] eta: 0:01:07 lr: 0.000123 grad: 0.0627 (0.0659) loss: 0.8516 (0.8487) time: 0.1428 data: 0.0612 max mem: 9377 +Train: [11] [5900/6250] eta: 0:00:52 lr: 0.000123 grad: 0.0609 (0.0659) loss: 0.8504 (0.8487) time: 0.1358 data: 0.0532 max mem: 9377 +Train: [11] [6000/6250] eta: 0:00:37 lr: 0.000123 grad: 0.0622 (0.0658) loss: 0.8437 (0.8486) time: 0.1423 data: 0.0645 max mem: 9377 +Train: [11] [6100/6250] eta: 0:00:22 lr: 0.000123 grad: 0.0662 (0.0659) loss: 0.8427 (0.8486) time: 0.1394 data: 0.0595 max mem: 9377 +Train: [11] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0647 (0.0659) loss: 0.8460 (0.8486) time: 0.1378 data: 0.0602 max mem: 9377 +Train: [11] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0675 (0.0659) loss: 0.8509 (0.8486) time: 0.1527 data: 0.0725 max mem: 9377 +Train: [11] Total time: 0:15:41 (0.1506 s / it) +Averaged stats: lr: 0.000123 grad: 0.0675 (0.0659) loss: 0.8509 (0.8486) +Eval (hcp-train-subset): [11] [ 0/62] eta: 0:05:32 loss: 0.8490 (0.8490) time: 5.3660 data: 5.3197 max mem: 9377 +Eval (hcp-train-subset): [11] [61/62] eta: 0:00:00 loss: 0.8469 (0.8483) time: 0.1451 data: 0.1199 max mem: 9377 +Eval (hcp-train-subset): [11] Total time: 0:00:14 (0.2275 s / it) +Averaged stats (hcp-train-subset): loss: 0.8469 (0.8483) +Eval (hcp-val): [11] [ 0/62] eta: 0:05:39 loss: 0.8451 (0.8451) time: 5.4678 data: 5.4014 max mem: 9377 +Eval (hcp-val): [11] [61/62] eta: 0:00:00 loss: 0.8461 (0.8474) time: 0.1131 data: 0.0880 max mem: 9377 +Eval (hcp-val): [11] Total time: 0:00:13 (0.2133 s / it) +Averaged stats (hcp-val): loss: 0.8461 (0.8474) +Eval (nsd-val): [11] [ 0/62] eta: 0:04:13 loss: 0.8090 (0.8090) time: 4.0874 data: 4.0159 max mem: 9377 +Eval (nsd-val): [11] [61/62] eta: 0:00:00 loss: 0.8154 (0.8184) time: 0.1300 data: 0.1031 max mem: 9377 +Eval (nsd-val): [11] Total time: 0:00:13 (0.2116 s / it) +Averaged stats (nsd-val): loss: 0.8154 (0.8184) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [12] [ 0/6250] eta: 9:05:16 lr: 0.000123 grad: 0.1455 (0.1455) loss: 0.8715 (0.8715) time: 5.2346 data: 5.1283 max mem: 9377 +Train: [12] [ 100/6250] eta: 0:20:56 lr: 0.000123 grad: 0.0707 (0.0710) loss: 0.8530 (0.8550) time: 0.1406 data: 0.0575 max mem: 9377 +Train: [12] [ 200/6250] eta: 0:18:05 lr: 0.000123 grad: 0.0638 (0.0684) loss: 0.8512 (0.8542) time: 0.1522 data: 0.0659 max mem: 9377 +Train: [12] [ 300/6250] eta: 0:17:10 lr: 0.000123 grad: 0.0672 (0.0671) loss: 0.8480 (0.8527) time: 0.1548 data: 0.0719 max mem: 9377 +Train: [12] [ 400/6250] eta: 0:16:12 lr: 0.000123 grad: 0.0629 (0.0666) loss: 0.8522 (0.8520) time: 0.1402 data: 0.0534 max mem: 9377 +Train: [12] [ 500/6250] eta: 0:15:38 lr: 0.000123 grad: 0.0646 (0.0666) loss: 0.8471 (0.8510) time: 0.1726 data: 0.0863 max mem: 9377 +Train: [12] [ 600/6250] eta: 0:15:06 lr: 0.000123 grad: 0.0647 (0.0675) loss: 0.8450 (0.8503) time: 0.1367 data: 0.0508 max mem: 9377 +Train: [12] [ 700/6250] eta: 0:14:38 lr: 0.000123 grad: 0.0645 (0.0676) loss: 0.8480 (0.8496) time: 0.1470 data: 0.0645 max mem: 9377 +Train: [12] [ 800/6250] eta: 0:14:14 lr: 0.000123 grad: 0.0589 (0.0673) loss: 0.8537 (0.8495) time: 0.1533 data: 0.0722 max mem: 9377 +Train: [12] [ 900/6250] eta: 0:13:51 lr: 0.000123 grad: 0.0626 (0.0672) loss: 0.8478 (0.8492) time: 0.1330 data: 0.0435 max mem: 9377 +Train: [12] [1000/6250] eta: 0:13:28 lr: 0.000123 grad: 0.0683 (0.0671) loss: 0.8423 (0.8488) time: 0.1553 data: 0.0776 max mem: 9377 +Train: [12] [1100/6250] eta: 0:13:05 lr: 0.000123 grad: 0.0633 (0.0670) loss: 0.8452 (0.8483) time: 0.1445 data: 0.0668 max mem: 9377 +Train: [12] [1200/6250] eta: 0:12:45 lr: 0.000123 grad: 0.0664 (0.0671) loss: 0.8456 (0.8480) time: 0.1344 data: 0.0455 max mem: 9377 +Train: [12] [1300/6250] eta: 0:12:27 lr: 0.000123 grad: 0.0655 (0.0671) loss: 0.8462 (0.8478) time: 0.1418 data: 0.0598 max mem: 9377 +Train: [12] [1400/6250] eta: 0:12:11 lr: 0.000123 grad: 0.0670 (0.0674) loss: 0.8458 (0.8476) time: 0.1451 data: 0.0654 max mem: 9377 +Train: [12] [1500/6250] eta: 0:11:53 lr: 0.000123 grad: 0.0637 (0.0673) loss: 0.8485 (0.8476) time: 0.1580 data: 0.0802 max mem: 9377 +Train: [12] [1600/6250] eta: 0:11:35 lr: 0.000123 grad: 0.0690 (0.0676) loss: 0.8394 (0.8475) time: 0.1167 data: 0.0354 max mem: 9377 +Train: [12] [1700/6250] eta: 0:11:20 lr: 0.000123 grad: 0.0613 (0.0675) loss: 0.8488 (0.8475) time: 0.1611 data: 0.0807 max mem: 9377 +Train: [12] [1800/6250] eta: 0:11:04 lr: 0.000123 grad: 0.0674 (0.0675) loss: 0.8454 (0.8474) time: 0.1504 data: 0.0738 max mem: 9377 +Train: [12] [1900/6250] eta: 0:10:48 lr: 0.000123 grad: 0.0619 (0.0674) loss: 0.8515 (0.8474) time: 0.1355 data: 0.0547 max mem: 9377 +Train: [12] [2000/6250] eta: 0:10:31 lr: 0.000123 grad: 0.0619 (0.0673) loss: 0.8428 (0.8473) time: 0.1339 data: 0.0476 max mem: 9377 +Train: [12] [2100/6250] eta: 0:10:16 lr: 0.000123 grad: 0.0654 (0.0672) loss: 0.8447 (0.8472) time: 0.1560 data: 0.0688 max mem: 9377 +Train: [12] [2200/6250] eta: 0:09:59 lr: 0.000123 grad: 0.0637 (0.0670) loss: 0.8468 (0.8472) time: 0.1371 data: 0.0494 max mem: 9377 +Train: [12] [2300/6250] eta: 0:09:43 lr: 0.000123 grad: 0.0682 (0.0671) loss: 0.8416 (0.8471) time: 0.1520 data: 0.0771 max mem: 9377 +Train: [12] [2400/6250] eta: 0:09:28 lr: 0.000123 grad: 0.0629 (0.0671) loss: 0.8453 (0.8471) time: 0.1298 data: 0.0499 max mem: 9377 +Train: [12] [2500/6250] eta: 0:09:13 lr: 0.000123 grad: 0.0639 (0.0671) loss: 0.8452 (0.8470) time: 0.1532 data: 0.0760 max mem: 9377 +Train: [12] [2600/6250] eta: 0:08:57 lr: 0.000123 grad: 0.0655 (0.0670) loss: 0.8453 (0.8471) time: 0.1504 data: 0.0709 max mem: 9377 +Train: [12] [2700/6250] eta: 0:08:42 lr: 0.000123 grad: 0.0619 (0.0670) loss: 0.8493 (0.8471) time: 0.1308 data: 0.0500 max mem: 9377 +Train: [12] [2800/6250] eta: 0:08:27 lr: 0.000123 grad: 0.0621 (0.0668) loss: 0.8386 (0.8470) time: 0.1495 data: 0.0697 max mem: 9377 +Train: [12] [2900/6250] eta: 0:08:11 lr: 0.000123 grad: 0.0623 (0.0668) loss: 0.8475 (0.8470) time: 0.1408 data: 0.0587 max mem: 9377 +Train: [12] [3000/6250] eta: 0:07:56 lr: 0.000123 grad: 0.0613 (0.0668) loss: 0.8464 (0.8469) time: 0.1343 data: 0.0536 max mem: 9377 +Train: [12] [3100/6250] eta: 0:07:41 lr: 0.000123 grad: 0.0607 (0.0667) loss: 0.8430 (0.8469) time: 0.1623 data: 0.0865 max mem: 9377 +Train: [12] [3200/6250] eta: 0:07:25 lr: 0.000123 grad: 0.0629 (0.0666) loss: 0.8463 (0.8469) time: 0.1526 data: 0.0777 max mem: 9377 +Train: [12] [3300/6250] eta: 0:07:11 lr: 0.000123 grad: 0.0581 (0.0665) loss: 0.8465 (0.8468) time: 0.1483 data: 0.0647 max mem: 9377 +Train: [12] [3400/6250] eta: 0:06:57 lr: 0.000123 grad: 0.0721 (0.0665) loss: 0.8411 (0.8468) time: 0.1613 data: 0.0839 max mem: 9377 +Train: [12] [3500/6250] eta: 0:06:42 lr: 0.000123 grad: 0.0588 (0.0664) loss: 0.8454 (0.8467) time: 0.1478 data: 0.0626 max mem: 9377 +Train: [12] [3600/6250] eta: 0:06:27 lr: 0.000123 grad: 0.0627 (0.0664) loss: 0.8427 (0.8466) time: 0.1611 data: 0.0784 max mem: 9377 +Train: [12] [3700/6250] eta: 0:06:13 lr: 0.000123 grad: 0.0660 (0.0664) loss: 0.8457 (0.8465) time: 0.1594 data: 0.0766 max mem: 9377 +Train: [12] [3800/6250] eta: 0:05:59 lr: 0.000123 grad: 0.0608 (0.0664) loss: 0.8438 (0.8464) time: 0.1956 data: 0.1218 max mem: 9377 +Train: [12] [3900/6250] eta: 0:05:45 lr: 0.000123 grad: 0.0618 (0.0664) loss: 0.8467 (0.8463) time: 0.1651 data: 0.0780 max mem: 9377 +Train: [12] [4000/6250] eta: 0:05:30 lr: 0.000123 grad: 0.0616 (0.0663) loss: 0.8312 (0.8462) time: 0.1357 data: 0.0598 max mem: 9377 +Train: [12] [4100/6250] eta: 0:05:16 lr: 0.000123 grad: 0.0625 (0.0663) loss: 0.8416 (0.8461) time: 0.1441 data: 0.0726 max mem: 9377 +Train: [12] [4200/6250] eta: 0:05:02 lr: 0.000123 grad: 0.0641 (0.0663) loss: 0.8451 (0.8460) time: 0.1550 data: 0.0782 max mem: 9377 +Train: [12] [4300/6250] eta: 0:04:47 lr: 0.000123 grad: 0.0667 (0.0662) loss: 0.8441 (0.8460) time: 0.1487 data: 0.0689 max mem: 9377 +Train: [12] [4400/6250] eta: 0:04:32 lr: 0.000123 grad: 0.0578 (0.0661) loss: 0.8410 (0.8460) time: 0.1329 data: 0.0569 max mem: 9377 +Train: [12] [4500/6250] eta: 0:04:17 lr: 0.000123 grad: 0.0602 (0.0661) loss: 0.8492 (0.8460) time: 0.1386 data: 0.0573 max mem: 9377 +Train: [12] [4600/6250] eta: 0:04:02 lr: 0.000123 grad: 0.0617 (0.0660) loss: 0.8427 (0.8460) time: 0.1343 data: 0.0532 max mem: 9377 +Train: [12] [4700/6250] eta: 0:03:47 lr: 0.000123 grad: 0.0622 (0.0659) loss: 0.8419 (0.8460) time: 0.1334 data: 0.0439 max mem: 9377 +Train: [12] [4800/6250] eta: 0:03:32 lr: 0.000123 grad: 0.0628 (0.0659) loss: 0.8468 (0.8460) time: 0.1639 data: 0.0829 max mem: 9377 +Train: [12] [4900/6250] eta: 0:03:17 lr: 0.000123 grad: 0.0645 (0.0659) loss: 0.8459 (0.8460) time: 0.1408 data: 0.0474 max mem: 9377 +Train: [12] [5000/6250] eta: 0:03:02 lr: 0.000123 grad: 0.0590 (0.0659) loss: 0.8457 (0.8460) time: 0.1108 data: 0.0215 max mem: 9377 +Train: [12] [5100/6250] eta: 0:02:48 lr: 0.000123 grad: 0.0605 (0.0658) loss: 0.8491 (0.8460) time: 0.1414 data: 0.0584 max mem: 9377 +Train: [12] [5200/6250] eta: 0:02:33 lr: 0.000123 grad: 0.0645 (0.0658) loss: 0.8441 (0.8460) time: 0.1512 data: 0.0705 max mem: 9377 +Train: [12] [5300/6250] eta: 0:02:18 lr: 0.000123 grad: 0.0615 (0.0658) loss: 0.8434 (0.8460) time: 0.1551 data: 0.0826 max mem: 9377 +Train: [12] [5400/6250] eta: 0:02:04 lr: 0.000123 grad: 0.0621 (0.0658) loss: 0.8432 (0.8460) time: 0.1496 data: 0.0687 max mem: 9377 +Train: [12] [5500/6250] eta: 0:01:49 lr: 0.000123 grad: 0.0607 (0.0658) loss: 0.8451 (0.8460) time: 0.1182 data: 0.0298 max mem: 9377 +Train: [12] [5600/6250] eta: 0:01:34 lr: 0.000123 grad: 0.0619 (0.0657) loss: 0.8431 (0.8460) time: 0.1570 data: 0.0773 max mem: 9377 +Train: [12] [5700/6250] eta: 0:01:20 lr: 0.000123 grad: 0.0607 (0.0657) loss: 0.8446 (0.8460) time: 0.1373 data: 0.0535 max mem: 9377 +Train: [12] [5800/6250] eta: 0:01:05 lr: 0.000123 grad: 0.0615 (0.0657) loss: 0.8436 (0.8459) time: 0.1431 data: 0.0584 max mem: 9377 +Train: [12] [5900/6250] eta: 0:00:50 lr: 0.000123 grad: 0.0616 (0.0657) loss: 0.8445 (0.8459) time: 0.1341 data: 0.0519 max mem: 9377 +Train: [12] [6000/6250] eta: 0:00:36 lr: 0.000123 grad: 0.0629 (0.0656) loss: 0.8423 (0.8459) time: 0.1482 data: 0.0703 max mem: 9377 +Train: [12] [6100/6250] eta: 0:00:21 lr: 0.000123 grad: 0.0653 (0.0656) loss: 0.8436 (0.8459) time: 0.1482 data: 0.0701 max mem: 9377 +Train: [12] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0608 (0.0656) loss: 0.8476 (0.8459) time: 0.1415 data: 0.0528 max mem: 9377 +Train: [12] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0608 (0.0656) loss: 0.8496 (0.8459) time: 0.1284 data: 0.0484 max mem: 9377 +Train: [12] Total time: 0:15:14 (0.1464 s / it) +Averaged stats: lr: 0.000123 grad: 0.0608 (0.0656) loss: 0.8496 (0.8459) +Eval (hcp-train-subset): [12] [ 0/62] eta: 0:06:05 loss: 0.8473 (0.8473) time: 5.8970 data: 5.8636 max mem: 9377 +Eval (hcp-train-subset): [12] [61/62] eta: 0:00:00 loss: 0.8487 (0.8476) time: 0.1051 data: 0.0797 max mem: 9377 +Eval (hcp-train-subset): [12] Total time: 0:00:14 (0.2285 s / it) +Averaged stats (hcp-train-subset): loss: 0.8487 (0.8476) +Eval (hcp-val): [12] [ 0/62] eta: 0:05:26 loss: 0.8468 (0.8468) time: 5.2631 data: 5.2325 max mem: 9377 +Eval (hcp-val): [12] [61/62] eta: 0:00:00 loss: 0.8458 (0.8468) time: 0.1530 data: 0.1274 max mem: 9377 +Eval (hcp-val): [12] Total time: 0:00:13 (0.2219 s / it) +Averaged stats (hcp-val): loss: 0.8458 (0.8468) +Eval (nsd-val): [12] [ 0/62] eta: 0:04:37 loss: 0.8100 (0.8100) time: 4.4695 data: 4.4337 max mem: 9377 +Eval (nsd-val): [12] [61/62] eta: 0:00:00 loss: 0.8152 (0.8177) time: 0.1403 data: 0.1123 max mem: 9377 +Eval (nsd-val): [12] Total time: 0:00:13 (0.2184 s / it) +Averaged stats (nsd-val): loss: 0.8152 (0.8177) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [13] [ 0/6250] eta: 10:00:41 lr: 0.000123 grad: 0.1161 (0.1161) loss: 0.8464 (0.8464) time: 5.7666 data: 5.6337 max mem: 9377 +Train: [13] [ 100/6250] eta: 0:21:19 lr: 0.000123 grad: 0.0728 (0.0772) loss: 0.8406 (0.8449) time: 0.1920 data: 0.1072 max mem: 9377 +Train: [13] [ 200/6250] eta: 0:18:14 lr: 0.000123 grad: 0.0685 (0.0747) loss: 0.8371 (0.8442) time: 0.1643 data: 0.0703 max mem: 9377 +Train: [13] [ 300/6250] eta: 0:16:50 lr: 0.000123 grad: 0.0628 (0.0713) loss: 0.8469 (0.8451) time: 0.1528 data: 0.0716 max mem: 9377 +Train: [13] [ 400/6250] eta: 0:16:10 lr: 0.000123 grad: 0.0615 (0.0692) loss: 0.8437 (0.8456) time: 0.1496 data: 0.0592 max mem: 9377 +Train: [13] [ 500/6250] eta: 0:15:37 lr: 0.000123 grad: 0.0644 (0.0679) loss: 0.8487 (0.8458) time: 0.1566 data: 0.0704 max mem: 9377 +Train: [13] [ 600/6250] eta: 0:15:09 lr: 0.000123 grad: 0.0585 (0.0671) loss: 0.8521 (0.8463) time: 0.1432 data: 0.0577 max mem: 9377 +Train: [13] [ 700/6250] eta: 0:14:40 lr: 0.000123 grad: 0.0609 (0.0664) loss: 0.8492 (0.8465) time: 0.1323 data: 0.0446 max mem: 9377 +Train: [13] [ 800/6250] eta: 0:14:17 lr: 0.000123 grad: 0.0604 (0.0659) loss: 0.8513 (0.8469) time: 0.1568 data: 0.0693 max mem: 9377 +Train: [13] [ 900/6250] eta: 0:13:57 lr: 0.000123 grad: 0.0624 (0.0656) loss: 0.8492 (0.8472) time: 0.1574 data: 0.0745 max mem: 9377 +Train: [13] [1000/6250] eta: 0:13:33 lr: 0.000123 grad: 0.0594 (0.0654) loss: 0.8473 (0.8474) time: 0.1424 data: 0.0563 max mem: 9377 +Train: [13] [1100/6250] eta: 0:13:12 lr: 0.000123 grad: 0.0605 (0.0650) loss: 0.8469 (0.8475) time: 0.1281 data: 0.0428 max mem: 9377 +Train: [13] [1200/6250] eta: 0:12:51 lr: 0.000123 grad: 0.0581 (0.0649) loss: 0.8495 (0.8476) time: 0.1356 data: 0.0545 max mem: 9377 +Train: [13] [1300/6250] eta: 0:12:32 lr: 0.000123 grad: 0.0623 (0.0650) loss: 0.8468 (0.8476) time: 0.1468 data: 0.0631 max mem: 9377 +Train: [13] [1400/6250] eta: 0:12:15 lr: 0.000123 grad: 0.0630 (0.0649) loss: 0.8462 (0.8476) time: 0.1464 data: 0.0636 max mem: 9377 +Train: [13] [1500/6250] eta: 0:11:58 lr: 0.000123 grad: 0.0647 (0.0650) loss: 0.8450 (0.8475) time: 0.1487 data: 0.0656 max mem: 9377 +Train: [13] [1600/6250] eta: 0:11:40 lr: 0.000123 grad: 0.0613 (0.0650) loss: 0.8492 (0.8475) time: 0.1568 data: 0.0733 max mem: 9377 +Train: [13] [1700/6250] eta: 0:11:23 lr: 0.000123 grad: 0.0669 (0.0650) loss: 0.8446 (0.8473) time: 0.1272 data: 0.0420 max mem: 9377 +Train: [13] [1800/6250] eta: 0:11:07 lr: 0.000123 grad: 0.0626 (0.0649) loss: 0.8462 (0.8473) time: 0.1389 data: 0.0649 max mem: 9377 +Train: [13] [1900/6250] eta: 0:10:51 lr: 0.000123 grad: 0.0623 (0.0649) loss: 0.8436 (0.8472) time: 0.1330 data: 0.0502 max mem: 9377 +Train: [13] [2000/6250] eta: 0:10:35 lr: 0.000123 grad: 0.0580 (0.0649) loss: 0.8462 (0.8471) time: 0.1287 data: 0.0423 max mem: 9377 +Train: [13] [2100/6250] eta: 0:10:19 lr: 0.000123 grad: 0.0603 (0.0648) loss: 0.8511 (0.8471) time: 0.1650 data: 0.0803 max mem: 9377 +Train: [13] [2200/6250] eta: 0:10:03 lr: 0.000123 grad: 0.0631 (0.0648) loss: 0.8466 (0.8470) time: 0.1579 data: 0.0781 max mem: 9377 +Train: [13] [2300/6250] eta: 0:09:47 lr: 0.000123 grad: 0.0614 (0.0648) loss: 0.8518 (0.8470) time: 0.1398 data: 0.0537 max mem: 9377 +Train: [13] [2400/6250] eta: 0:09:32 lr: 0.000123 grad: 0.0639 (0.0648) loss: 0.8491 (0.8470) time: 0.1379 data: 0.0623 max mem: 9377 +Train: [13] [2500/6250] eta: 0:09:17 lr: 0.000123 grad: 0.0601 (0.0647) loss: 0.8499 (0.8470) time: 0.1361 data: 0.0549 max mem: 9377 +Train: [13] [2600/6250] eta: 0:09:01 lr: 0.000123 grad: 0.0617 (0.0646) loss: 0.8514 (0.8470) time: 0.1373 data: 0.0558 max mem: 9377 +Train: [13] [2700/6250] eta: 0:08:46 lr: 0.000123 grad: 0.0601 (0.0646) loss: 0.8474 (0.8471) time: 0.1580 data: 0.0800 max mem: 9377 +Train: [13] [2800/6250] eta: 0:08:31 lr: 0.000123 grad: 0.0637 (0.0645) loss: 0.8468 (0.8470) time: 0.1682 data: 0.0882 max mem: 9377 +Train: [13] [2900/6250] eta: 0:08:16 lr: 0.000123 grad: 0.0619 (0.0645) loss: 0.8445 (0.8470) time: 0.1491 data: 0.0699 max mem: 9377 +Train: [13] [3000/6250] eta: 0:08:00 lr: 0.000123 grad: 0.0579 (0.0643) loss: 0.8433 (0.8470) time: 0.1257 data: 0.0443 max mem: 9377 +Train: [13] [3100/6250] eta: 0:07:45 lr: 0.000123 grad: 0.0589 (0.0643) loss: 0.8385 (0.8469) time: 0.1445 data: 0.0680 max mem: 9377 +Train: [13] [3200/6250] eta: 0:07:30 lr: 0.000123 grad: 0.0617 (0.0642) loss: 0.8420 (0.8468) time: 0.1273 data: 0.0426 max mem: 9377 +Train: [13] [3300/6250] eta: 0:07:15 lr: 0.000123 grad: 0.0565 (0.0641) loss: 0.8485 (0.8467) time: 0.1562 data: 0.0765 max mem: 9377 +Train: [13] [3400/6250] eta: 0:07:00 lr: 0.000123 grad: 0.0616 (0.0641) loss: 0.8393 (0.8466) time: 0.1468 data: 0.0684 max mem: 9377 +Train: [13] [3500/6250] eta: 0:06:46 lr: 0.000123 grad: 0.0628 (0.0641) loss: 0.8441 (0.8465) time: 0.1539 data: 0.0727 max mem: 9377 +Train: [13] [3600/6250] eta: 0:06:33 lr: 0.000123 grad: 0.0599 (0.0641) loss: 0.8443 (0.8465) time: 0.1566 data: 0.0707 max mem: 9377 +Train: [13] [3700/6250] eta: 0:06:18 lr: 0.000122 grad: 0.0618 (0.0641) loss: 0.8451 (0.8464) time: 0.1591 data: 0.0732 max mem: 9377 +Train: [13] [3800/6250] eta: 0:06:04 lr: 0.000122 grad: 0.0650 (0.0642) loss: 0.8371 (0.8462) time: 0.1672 data: 0.0807 max mem: 9377 +Train: [13] [3900/6250] eta: 0:05:49 lr: 0.000122 grad: 0.0622 (0.0642) loss: 0.8434 (0.8461) time: 0.2172 data: 0.1455 max mem: 9377 +Train: [13] [4000/6250] eta: 0:05:34 lr: 0.000122 grad: 0.0657 (0.0642) loss: 0.8356 (0.8460) time: 0.1368 data: 0.0527 max mem: 9377 +Train: [13] [4100/6250] eta: 0:05:19 lr: 0.000122 grad: 0.0596 (0.0642) loss: 0.8426 (0.8459) time: 0.1359 data: 0.0496 max mem: 9377 +Train: [13] [4200/6250] eta: 0:05:04 lr: 0.000122 grad: 0.0630 (0.0642) loss: 0.8385 (0.8459) time: 0.1526 data: 0.0819 max mem: 9377 +Train: [13] [4300/6250] eta: 0:04:49 lr: 0.000122 grad: 0.0617 (0.0642) loss: 0.8396 (0.8458) time: 0.1278 data: 0.0437 max mem: 9377 +Train: [13] [4400/6250] eta: 0:04:34 lr: 0.000122 grad: 0.0663 (0.0642) loss: 0.8374 (0.8457) time: 0.1580 data: 0.0716 max mem: 9377 +Train: [13] [4500/6250] eta: 0:04:19 lr: 0.000122 grad: 0.0644 (0.0642) loss: 0.8391 (0.8456) time: 0.1482 data: 0.0723 max mem: 9377 +Train: [13] [4600/6250] eta: 0:04:03 lr: 0.000122 grad: 0.0663 (0.0643) loss: 0.8387 (0.8455) time: 0.1379 data: 0.0497 max mem: 9377 +Train: [13] [4700/6250] eta: 0:03:48 lr: 0.000122 grad: 0.0633 (0.0643) loss: 0.8418 (0.8454) time: 0.1041 data: 0.0088 max mem: 9377 +Train: [13] [4800/6250] eta: 0:03:33 lr: 0.000122 grad: 0.0643 (0.0643) loss: 0.8470 (0.8454) time: 0.1177 data: 0.0290 max mem: 9377 +Train: [13] [4900/6250] eta: 0:03:18 lr: 0.000122 grad: 0.0648 (0.0644) loss: 0.8418 (0.8453) time: 0.1173 data: 0.0286 max mem: 9377 +Train: [13] [5000/6250] eta: 0:03:03 lr: 0.000122 grad: 0.0631 (0.0644) loss: 0.8414 (0.8452) time: 0.1460 data: 0.0589 max mem: 9377 +Train: [13] [5100/6250] eta: 0:02:48 lr: 0.000122 grad: 0.0653 (0.0644) loss: 0.8387 (0.8451) time: 0.1659 data: 0.0875 max mem: 9377 +Train: [13] [5200/6250] eta: 0:02:34 lr: 0.000122 grad: 0.0628 (0.0644) loss: 0.8458 (0.8450) time: 0.1133 data: 0.0306 max mem: 9377 +Train: [13] [5300/6250] eta: 0:02:19 lr: 0.000122 grad: 0.0633 (0.0644) loss: 0.8427 (0.8450) time: 0.1389 data: 0.0516 max mem: 9377 +Train: [13] [5400/6250] eta: 0:02:04 lr: 0.000122 grad: 0.0622 (0.0644) loss: 0.8459 (0.8450) time: 0.1205 data: 0.0344 max mem: 9377 +Train: [13] [5500/6250] eta: 0:01:50 lr: 0.000122 grad: 0.0618 (0.0644) loss: 0.8454 (0.8450) time: 0.1164 data: 0.0317 max mem: 9377 +Train: [13] [5600/6250] eta: 0:01:35 lr: 0.000122 grad: 0.0611 (0.0644) loss: 0.8444 (0.8449) time: 0.1536 data: 0.0716 max mem: 9377 +Train: [13] [5700/6250] eta: 0:01:20 lr: 0.000122 grad: 0.0640 (0.0645) loss: 0.8448 (0.8449) time: 0.1310 data: 0.0515 max mem: 9377 +Train: [13] [5800/6250] eta: 0:01:05 lr: 0.000122 grad: 0.0612 (0.0645) loss: 0.8472 (0.8449) time: 0.1478 data: 0.0720 max mem: 9377 +Train: [13] [5900/6250] eta: 0:00:51 lr: 0.000122 grad: 0.0591 (0.0645) loss: 0.8426 (0.8448) time: 0.1416 data: 0.0619 max mem: 9377 +Train: [13] [6000/6250] eta: 0:00:36 lr: 0.000122 grad: 0.0652 (0.0645) loss: 0.8424 (0.8448) time: 0.1395 data: 0.0574 max mem: 9377 +Train: [13] [6100/6250] eta: 0:00:21 lr: 0.000122 grad: 0.0670 (0.0645) loss: 0.8412 (0.8447) time: 0.1520 data: 0.0764 max mem: 9377 +Train: [13] [6200/6250] eta: 0:00:07 lr: 0.000122 grad: 0.0619 (0.0645) loss: 0.8434 (0.8446) time: 0.1614 data: 0.0783 max mem: 9377 +Train: [13] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0626 (0.0646) loss: 0.8355 (0.8445) time: 0.1702 data: 0.0884 max mem: 9377 +Train: [13] Total time: 0:15:22 (0.1476 s / it) +Averaged stats: lr: 0.000122 grad: 0.0626 (0.0646) loss: 0.8355 (0.8445) +Eval (hcp-train-subset): [13] [ 0/62] eta: 0:04:52 loss: 0.8446 (0.8446) time: 4.7213 data: 4.6896 max mem: 9377 +Eval (hcp-train-subset): [13] [61/62] eta: 0:00:00 loss: 0.8456 (0.8458) time: 0.1441 data: 0.1188 max mem: 9377 +Eval (hcp-train-subset): [13] Total time: 0:00:14 (0.2268 s / it) +Averaged stats (hcp-train-subset): loss: 0.8456 (0.8458) +Eval (hcp-val): [13] [ 0/62] eta: 0:05:37 loss: 0.8424 (0.8424) time: 5.4486 data: 5.4188 max mem: 9377 +Eval (hcp-val): [13] [61/62] eta: 0:00:00 loss: 0.8428 (0.8449) time: 0.1477 data: 0.1225 max mem: 9377 +Eval (hcp-val): [13] Total time: 0:00:13 (0.2204 s / it) +Averaged stats (hcp-val): loss: 0.8428 (0.8449) +Eval (nsd-val): [13] [ 0/62] eta: 0:02:59 loss: 0.8070 (0.8070) time: 2.8936 data: 2.8028 max mem: 9377 +Eval (nsd-val): [13] [61/62] eta: 0:00:00 loss: 0.8163 (0.8158) time: 0.0974 data: 0.0694 max mem: 9377 +Eval (nsd-val): [13] Total time: 0:00:13 (0.2129 s / it) +Averaged stats (nsd-val): loss: 0.8163 (0.8158) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [14] [ 0/6250] eta: 8:05:52 lr: 0.000122 grad: 0.0585 (0.0585) loss: 0.8867 (0.8867) time: 4.6644 data: 4.4463 max mem: 9377 +Train: [14] [ 100/6250] eta: 0:20:11 lr: 0.000122 grad: 0.0644 (0.0708) loss: 0.8434 (0.8528) time: 0.1466 data: 0.0515 max mem: 9377 +Train: [14] [ 200/6250] eta: 0:17:40 lr: 0.000122 grad: 0.0714 (0.0706) loss: 0.8383 (0.8448) time: 0.1397 data: 0.0556 max mem: 9377 +Train: [14] [ 300/6250] eta: 0:16:26 lr: 0.000122 grad: 0.0578 (0.0688) loss: 0.8397 (0.8424) time: 0.1470 data: 0.0538 max mem: 9377 +Train: [14] [ 400/6250] eta: 0:15:35 lr: 0.000122 grad: 0.0619 (0.0688) loss: 0.8402 (0.8416) time: 0.1420 data: 0.0491 max mem: 9377 +Train: [14] [ 500/6250] eta: 0:15:12 lr: 0.000122 grad: 0.0632 (0.0680) loss: 0.8387 (0.8413) time: 0.1565 data: 0.0735 max mem: 9377 +Train: [14] [ 600/6250] eta: 0:14:45 lr: 0.000122 grad: 0.0604 (0.0670) loss: 0.8400 (0.8413) time: 0.1554 data: 0.0751 max mem: 9377 +Train: [14] [ 700/6250] eta: 0:14:17 lr: 0.000122 grad: 0.0582 (0.0664) loss: 0.8448 (0.8412) time: 0.1337 data: 0.0476 max mem: 9377 +Train: [14] [ 800/6250] eta: 0:13:57 lr: 0.000122 grad: 0.0608 (0.0661) loss: 0.8405 (0.8411) time: 0.1548 data: 0.0690 max mem: 9377 +Train: [14] [ 900/6250] eta: 0:13:36 lr: 0.000122 grad: 0.0618 (0.0657) loss: 0.8476 (0.8411) time: 0.1464 data: 0.0671 max mem: 9377 +Train: [14] [1000/6250] eta: 0:13:18 lr: 0.000122 grad: 0.0624 (0.0652) loss: 0.8460 (0.8415) time: 0.1614 data: 0.0763 max mem: 9377 +Train: [14] [1100/6250] eta: 0:12:55 lr: 0.000122 grad: 0.0611 (0.0651) loss: 0.8483 (0.8418) time: 0.1355 data: 0.0518 max mem: 9377 +Train: [14] [1200/6250] eta: 0:12:34 lr: 0.000122 grad: 0.0623 (0.0650) loss: 0.8484 (0.8420) time: 0.1479 data: 0.0624 max mem: 9377 +Train: [14] [1300/6250] eta: 0:12:16 lr: 0.000122 grad: 0.0604 (0.0649) loss: 0.8421 (0.8421) time: 0.1335 data: 0.0509 max mem: 9377 +Train: [14] [1400/6250] eta: 0:11:58 lr: 0.000122 grad: 0.0705 (0.0651) loss: 0.8435 (0.8422) time: 0.1220 data: 0.0316 max mem: 9377 +Train: [14] [1500/6250] eta: 0:11:40 lr: 0.000122 grad: 0.0634 (0.0654) loss: 0.8368 (0.8422) time: 0.1205 data: 0.0405 max mem: 9377 +Train: [14] [1600/6250] eta: 0:11:23 lr: 0.000122 grad: 0.0608 (0.0654) loss: 0.8440 (0.8423) time: 0.1267 data: 0.0379 max mem: 9377 +Train: [14] [1700/6250] eta: 0:11:05 lr: 0.000122 grad: 0.0613 (0.0654) loss: 0.8423 (0.8424) time: 0.1361 data: 0.0494 max mem: 9377 +Train: [14] [1800/6250] eta: 0:10:49 lr: 0.000122 grad: 0.0645 (0.0654) loss: 0.8400 (0.8424) time: 0.1296 data: 0.0426 max mem: 9377 +Train: [14] [1900/6250] eta: 0:10:32 lr: 0.000122 grad: 0.0633 (0.0653) loss: 0.8461 (0.8425) time: 0.1650 data: 0.0852 max mem: 9377 +Train: [14] [2000/6250] eta: 0:10:17 lr: 0.000122 grad: 0.0643 (0.0654) loss: 0.8435 (0.8425) time: 0.1433 data: 0.0572 max mem: 9377 +Train: [14] [2100/6250] eta: 0:10:01 lr: 0.000122 grad: 0.0788 (0.0656) loss: 0.8412 (0.8425) time: 0.1484 data: 0.0733 max mem: 9377 +Train: [14] [2200/6250] eta: 0:09:46 lr: 0.000122 grad: 0.0609 (0.0656) loss: 0.8457 (0.8425) time: 0.1281 data: 0.0397 max mem: 9377 +Train: [14] [2300/6250] eta: 0:09:31 lr: 0.000122 grad: 0.0630 (0.0655) loss: 0.8450 (0.8424) time: 0.1451 data: 0.0622 max mem: 9377 +Train: [14] [2400/6250] eta: 0:09:16 lr: 0.000122 grad: 0.0630 (0.0655) loss: 0.8486 (0.8424) time: 0.1287 data: 0.0494 max mem: 9377 +Train: [14] [2500/6250] eta: 0:09:01 lr: 0.000122 grad: 0.0626 (0.0656) loss: 0.8434 (0.8424) time: 0.1307 data: 0.0476 max mem: 9377 +Train: [14] [2600/6250] eta: 0:08:46 lr: 0.000122 grad: 0.0631 (0.0656) loss: 0.8444 (0.8424) time: 0.1357 data: 0.0499 max mem: 9377 +Train: [14] [2700/6250] eta: 0:08:31 lr: 0.000122 grad: 0.0618 (0.0656) loss: 0.8452 (0.8425) time: 0.1273 data: 0.0429 max mem: 9377 +Train: [14] [2800/6250] eta: 0:08:15 lr: 0.000122 grad: 0.0612 (0.0657) loss: 0.8451 (0.8424) time: 0.1436 data: 0.0583 max mem: 9377 +Train: [14] [2900/6250] eta: 0:08:01 lr: 0.000122 grad: 0.0617 (0.0657) loss: 0.8429 (0.8425) time: 0.1471 data: 0.0622 max mem: 9377 +Train: [14] [3000/6250] eta: 0:07:45 lr: 0.000122 grad: 0.0642 (0.0656) loss: 0.8405 (0.8424) time: 0.1453 data: 0.0548 max mem: 9377 +Train: [14] [3100/6250] eta: 0:07:30 lr: 0.000122 grad: 0.0608 (0.0655) loss: 0.8418 (0.8424) time: 0.1308 data: 0.0494 max mem: 9377 +Train: [14] [3200/6250] eta: 0:07:15 lr: 0.000122 grad: 0.0632 (0.0655) loss: 0.8398 (0.8424) time: 0.1206 data: 0.0348 max mem: 9377 +Train: [14] [3300/6250] eta: 0:07:01 lr: 0.000122 grad: 0.0619 (0.0655) loss: 0.8399 (0.8423) time: 0.1514 data: 0.0716 max mem: 9377 +Train: [14] [3400/6250] eta: 0:06:51 lr: 0.000122 grad: 0.0629 (0.0655) loss: 0.8387 (0.8422) time: 0.1696 data: 0.0859 max mem: 9377 +Train: [14] [3500/6250] eta: 0:06:37 lr: 0.000122 grad: 0.0623 (0.0654) loss: 0.8372 (0.8421) time: 0.1905 data: 0.1055 max mem: 9377 +Train: [14] [3600/6250] eta: 0:06:23 lr: 0.000122 grad: 0.0644 (0.0654) loss: 0.8407 (0.8421) time: 0.1516 data: 0.0640 max mem: 9377 +Train: [14] [3700/6250] eta: 0:06:08 lr: 0.000122 grad: 0.0611 (0.0654) loss: 0.8419 (0.8420) time: 0.1341 data: 0.0506 max mem: 9377 +Train: [14] [3800/6250] eta: 0:05:56 lr: 0.000122 grad: 0.0666 (0.0655) loss: 0.8334 (0.8419) time: 0.1575 data: 0.0739 max mem: 9377 +Train: [14] [3900/6250] eta: 0:05:42 lr: 0.000122 grad: 0.0646 (0.0655) loss: 0.8416 (0.8419) time: 0.1523 data: 0.0669 max mem: 9377 +Train: [14] [4000/6250] eta: 0:05:27 lr: 0.000122 grad: 0.0631 (0.0655) loss: 0.8407 (0.8418) time: 0.1462 data: 0.0690 max mem: 9377 +Train: [14] [4100/6250] eta: 0:05:12 lr: 0.000122 grad: 0.0596 (0.0655) loss: 0.8443 (0.8418) time: 0.1108 data: 0.0316 max mem: 9377 +Train: [14] [4200/6250] eta: 0:04:57 lr: 0.000122 grad: 0.0640 (0.0655) loss: 0.8388 (0.8418) time: 0.1113 data: 0.0317 max mem: 9377 +Train: [14] [4300/6250] eta: 0:04:42 lr: 0.000122 grad: 0.0660 (0.0655) loss: 0.8396 (0.8418) time: 0.1641 data: 0.0809 max mem: 9377 +Train: [14] [4400/6250] eta: 0:04:28 lr: 0.000122 grad: 0.0666 (0.0656) loss: 0.8375 (0.8417) time: 0.1278 data: 0.0439 max mem: 9377 +Train: [14] [4500/6250] eta: 0:04:13 lr: 0.000122 grad: 0.0618 (0.0656) loss: 0.8384 (0.8417) time: 0.1764 data: 0.0933 max mem: 9377 +Train: [14] [4600/6250] eta: 0:03:58 lr: 0.000122 grad: 0.0635 (0.0656) loss: 0.8421 (0.8417) time: 0.1484 data: 0.0620 max mem: 9377 +Train: [14] [4700/6250] eta: 0:03:43 lr: 0.000122 grad: 0.0612 (0.0655) loss: 0.8381 (0.8417) time: 0.1490 data: 0.0613 max mem: 9377 +Train: [14] [4800/6250] eta: 0:03:29 lr: 0.000122 grad: 0.0646 (0.0655) loss: 0.8398 (0.8417) time: 0.1298 data: 0.0449 max mem: 9377 +Train: [14] [4900/6250] eta: 0:03:14 lr: 0.000122 grad: 0.0661 (0.0655) loss: 0.8398 (0.8417) time: 0.1522 data: 0.0695 max mem: 9377 +Train: [14] [5000/6250] eta: 0:02:59 lr: 0.000122 grad: 0.0608 (0.0655) loss: 0.8393 (0.8417) time: 0.1287 data: 0.0399 max mem: 9377 +Train: [14] [5100/6250] eta: 0:02:46 lr: 0.000122 grad: 0.0664 (0.0655) loss: 0.8380 (0.8416) time: 0.2084 data: 0.1273 max mem: 9377 +Train: [14] [5200/6250] eta: 0:02:31 lr: 0.000122 grad: 0.0631 (0.0655) loss: 0.8362 (0.8416) time: 0.1553 data: 0.0720 max mem: 9377 +Train: [14] [5300/6250] eta: 0:02:17 lr: 0.000122 grad: 0.0612 (0.0655) loss: 0.8445 (0.8415) time: 0.1810 data: 0.1003 max mem: 9377 +Train: [14] [5400/6250] eta: 0:02:03 lr: 0.000122 grad: 0.0603 (0.0654) loss: 0.8393 (0.8415) time: 0.1688 data: 0.0894 max mem: 9377 +Train: [14] [5500/6250] eta: 0:01:48 lr: 0.000122 grad: 0.0643 (0.0654) loss: 0.8373 (0.8414) time: 0.1602 data: 0.0792 max mem: 9377 +Train: [14] [5600/6250] eta: 0:01:34 lr: 0.000122 grad: 0.0597 (0.0654) loss: 0.8314 (0.8413) time: 0.1441 data: 0.0655 max mem: 9377 +Train: [14] [5700/6250] eta: 0:01:20 lr: 0.000122 grad: 0.0605 (0.0653) loss: 0.8398 (0.8413) time: 0.1617 data: 0.0878 max mem: 9377 +Train: [14] [5800/6250] eta: 0:01:05 lr: 0.000122 grad: 0.0634 (0.0653) loss: 0.8437 (0.8413) time: 0.1432 data: 0.0576 max mem: 9377 +Train: [14] [5900/6250] eta: 0:00:51 lr: 0.000122 grad: 0.0582 (0.0653) loss: 0.8412 (0.8413) time: 0.1431 data: 0.0602 max mem: 9377 +Train: [14] [6000/6250] eta: 0:00:36 lr: 0.000122 grad: 0.0619 (0.0653) loss: 0.8424 (0.8413) time: 0.1526 data: 0.0718 max mem: 9377 +Train: [14] [6100/6250] eta: 0:00:21 lr: 0.000122 grad: 0.0617 (0.0653) loss: 0.8412 (0.8413) time: 0.1493 data: 0.0683 max mem: 9377 +Train: [14] [6200/6250] eta: 0:00:07 lr: 0.000122 grad: 0.0611 (0.0653) loss: 0.8468 (0.8413) time: 0.1475 data: 0.0665 max mem: 9377 +Train: [14] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0595 (0.0653) loss: 0.8388 (0.8413) time: 0.1373 data: 0.0594 max mem: 9377 +Train: [14] Total time: 0:15:19 (0.1471 s / it) +Averaged stats: lr: 0.000122 grad: 0.0595 (0.0653) loss: 0.8388 (0.8413) +Eval (hcp-train-subset): [14] [ 0/62] eta: 0:04:43 loss: 0.8424 (0.8424) time: 4.5657 data: 4.4613 max mem: 9377 +Eval (hcp-train-subset): [14] [61/62] eta: 0:00:00 loss: 0.8468 (0.8452) time: 0.1635 data: 0.1380 max mem: 9377 +Eval (hcp-train-subset): [14] Total time: 0:00:15 (0.2506 s / it) +Averaged stats (hcp-train-subset): loss: 0.8468 (0.8452) +Making plots (hcp-train-subset): example=34 +Eval (hcp-val): [14] [ 0/62] eta: 0:05:21 loss: 0.8415 (0.8415) time: 5.1795 data: 5.1464 max mem: 9377 +Eval (hcp-val): [14] [61/62] eta: 0:00:00 loss: 0.8440 (0.8444) time: 0.1346 data: 0.1087 max mem: 9377 +Eval (hcp-val): [14] Total time: 0:00:16 (0.2615 s / it) +Averaged stats (hcp-val): loss: 0.8440 (0.8444) +Making plots (hcp-val): example=4 +Eval (nsd-val): [14] [ 0/62] eta: 0:03:39 loss: 0.8047 (0.8047) time: 3.5327 data: 3.4554 max mem: 9377 +Eval (nsd-val): [14] [61/62] eta: 0:00:00 loss: 0.8117 (0.8129) time: 0.1516 data: 0.1260 max mem: 9377 +Eval (nsd-val): [14] Total time: 0:00:15 (0.2434 s / it) +Averaged stats (nsd-val): loss: 0.8117 (0.8129) +Making plots (nsd-val): example=11 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00014.pth +Train: [15] [ 0/6250] eta: 12:40:34 lr: 0.000122 grad: 0.0640 (0.0640) loss: 0.8446 (0.8446) time: 7.3015 data: 7.1740 max mem: 9377 +Train: [15] [ 100/6250] eta: 0:24:08 lr: 0.000122 grad: 0.0652 (0.0714) loss: 0.8474 (0.8526) time: 0.1736 data: 0.0700 max mem: 9377 +Train: [15] [ 200/6250] eta: 0:20:37 lr: 0.000122 grad: 0.0608 (0.0705) loss: 0.8397 (0.8474) time: 0.1679 data: 0.0814 max mem: 9377 +Train: [15] [ 300/6250] eta: 0:19:08 lr: 0.000122 grad: 0.0644 (0.0692) loss: 0.8396 (0.8446) time: 0.1874 data: 0.1020 max mem: 9377 +Train: [15] [ 400/6250] eta: 0:17:44 lr: 0.000122 grad: 0.0641 (0.0695) loss: 0.8389 (0.8431) time: 0.1584 data: 0.0664 max mem: 9377 +Train: [15] [ 500/6250] eta: 0:17:03 lr: 0.000122 grad: 0.0637 (0.0690) loss: 0.8379 (0.8423) time: 0.1842 data: 0.1024 max mem: 9377 +Train: [15] [ 600/6250] eta: 0:16:22 lr: 0.000122 grad: 0.0637 (0.0685) loss: 0.8386 (0.8417) time: 0.1289 data: 0.0489 max mem: 9377 +Train: [15] [ 700/6250] eta: 0:15:50 lr: 0.000122 grad: 0.0639 (0.0683) loss: 0.8350 (0.8413) time: 0.1509 data: 0.0606 max mem: 9377 +Train: [15] [ 800/6250] eta: 0:15:23 lr: 0.000122 grad: 0.0615 (0.0681) loss: 0.8343 (0.8407) time: 0.1613 data: 0.0716 max mem: 9377 +Train: [15] [ 900/6250] eta: 0:14:56 lr: 0.000122 grad: 0.0583 (0.0678) loss: 0.8433 (0.8405) time: 0.1515 data: 0.0644 max mem: 9377 +Train: [15] [1000/6250] eta: 0:14:30 lr: 0.000122 grad: 0.0613 (0.0676) loss: 0.8446 (0.8403) time: 0.1548 data: 0.0689 max mem: 9377 +Train: [15] [1100/6250] eta: 0:14:03 lr: 0.000121 grad: 0.0638 (0.0673) loss: 0.8385 (0.8401) time: 0.1546 data: 0.0688 max mem: 9377 +Train: [15] [1200/6250] eta: 0:13:38 lr: 0.000121 grad: 0.0657 (0.0672) loss: 0.8391 (0.8400) time: 0.1438 data: 0.0601 max mem: 9377 +Train: [15] [1300/6250] eta: 0:13:17 lr: 0.000121 grad: 0.0583 (0.0673) loss: 0.8433 (0.8400) time: 0.1661 data: 0.0840 max mem: 9377 +Train: [15] [1400/6250] eta: 0:12:56 lr: 0.000121 grad: 0.0644 (0.0671) loss: 0.8393 (0.8399) time: 0.1522 data: 0.0661 max mem: 9377 +Train: [15] [1500/6250] eta: 0:12:35 lr: 0.000121 grad: 0.0632 (0.0669) loss: 0.8393 (0.8398) time: 0.1540 data: 0.0719 max mem: 9377 +Train: [15] [1600/6250] eta: 0:12:16 lr: 0.000121 grad: 0.0644 (0.0668) loss: 0.8350 (0.8397) time: 0.1512 data: 0.0674 max mem: 9377 +Train: [15] [1700/6250] eta: 0:11:57 lr: 0.000121 grad: 0.0648 (0.0667) loss: 0.8380 (0.8397) time: 0.1655 data: 0.0851 max mem: 9377 +Train: [15] [1800/6250] eta: 0:11:38 lr: 0.000121 grad: 0.0618 (0.0667) loss: 0.8384 (0.8398) time: 0.1495 data: 0.0676 max mem: 9377 +Train: [15] [1900/6250] eta: 0:11:19 lr: 0.000121 grad: 0.0638 (0.0666) loss: 0.8400 (0.8397) time: 0.1497 data: 0.0659 max mem: 9377 +Train: [15] [2000/6250] eta: 0:11:00 lr: 0.000121 grad: 0.0613 (0.0666) loss: 0.8393 (0.8397) time: 0.1401 data: 0.0561 max mem: 9377 +Train: [15] [2100/6250] eta: 0:10:42 lr: 0.000121 grad: 0.0635 (0.0665) loss: 0.8457 (0.8399) time: 0.1499 data: 0.0617 max mem: 9377 +Train: [15] [2200/6250] eta: 0:10:23 lr: 0.000121 grad: 0.0621 (0.0664) loss: 0.8412 (0.8399) time: 0.1415 data: 0.0619 max mem: 9377 +Train: [15] [2300/6250] eta: 0:10:07 lr: 0.000121 grad: 0.0597 (0.0664) loss: 0.8448 (0.8400) time: 0.1596 data: 0.0798 max mem: 9377 +Train: [15] [2400/6250] eta: 0:09:52 lr: 0.000121 grad: 0.0618 (0.0664) loss: 0.8429 (0.8402) time: 0.1747 data: 0.0883 max mem: 9377 +Train: [15] [2500/6250] eta: 0:09:34 lr: 0.000121 grad: 0.0593 (0.0662) loss: 0.8390 (0.8402) time: 0.1519 data: 0.0649 max mem: 9377 +Train: [15] [2600/6250] eta: 0:09:17 lr: 0.000121 grad: 0.0592 (0.0661) loss: 0.8409 (0.8403) time: 0.1501 data: 0.0733 max mem: 9377 +Train: [15] [2700/6250] eta: 0:09:01 lr: 0.000121 grad: 0.0600 (0.0660) loss: 0.8353 (0.8402) time: 0.1334 data: 0.0498 max mem: 9377 +Train: [15] [2800/6250] eta: 0:08:45 lr: 0.000121 grad: 0.0610 (0.0659) loss: 0.8395 (0.8403) time: 0.1434 data: 0.0564 max mem: 9377 +Train: [15] [2900/6250] eta: 0:08:30 lr: 0.000121 grad: 0.0606 (0.0659) loss: 0.8440 (0.8403) time: 0.1619 data: 0.0837 max mem: 9377 +Train: [15] [3000/6250] eta: 0:08:12 lr: 0.000121 grad: 0.0615 (0.0658) loss: 0.8401 (0.8403) time: 0.1462 data: 0.0688 max mem: 9377 +Train: [15] [3100/6250] eta: 0:07:58 lr: 0.000121 grad: 0.0628 (0.0657) loss: 0.8434 (0.8403) time: 0.2110 data: 0.1336 max mem: 9377 +Train: [15] [3200/6250] eta: 0:07:44 lr: 0.000121 grad: 0.0604 (0.0656) loss: 0.8431 (0.8403) time: 0.1660 data: 0.0841 max mem: 9377 +Train: [15] [3300/6250] eta: 0:07:29 lr: 0.000121 grad: 0.0601 (0.0655) loss: 0.8414 (0.8404) time: 0.1690 data: 0.0850 max mem: 9377 +Train: [15] [3400/6250] eta: 0:07:14 lr: 0.000121 grad: 0.0608 (0.0655) loss: 0.8419 (0.8404) time: 0.1473 data: 0.0640 max mem: 9377 +Train: [15] [3500/6250] eta: 0:06:59 lr: 0.000121 grad: 0.0666 (0.0654) loss: 0.8409 (0.8404) time: 0.1449 data: 0.0626 max mem: 9377 +Train: [15] [3600/6250] eta: 0:06:44 lr: 0.000121 grad: 0.0593 (0.0653) loss: 0.8426 (0.8404) time: 0.1698 data: 0.0942 max mem: 9377 +Train: [15] [3700/6250] eta: 0:06:30 lr: 0.000121 grad: 0.0609 (0.0652) loss: 0.8449 (0.8405) time: 0.1651 data: 0.0802 max mem: 9377 +Train: [15] [3800/6250] eta: 0:06:15 lr: 0.000121 grad: 0.0565 (0.0651) loss: 0.8453 (0.8405) time: 0.1463 data: 0.0681 max mem: 9377 +Train: [15] [3900/6250] eta: 0:05:59 lr: 0.000121 grad: 0.0611 (0.0650) loss: 0.8452 (0.8406) time: 0.1491 data: 0.0707 max mem: 9377 +Train: [15] [4000/6250] eta: 0:05:43 lr: 0.000121 grad: 0.0592 (0.0649) loss: 0.8444 (0.8407) time: 0.1455 data: 0.0637 max mem: 9377 +Train: [15] [4100/6250] eta: 0:05:28 lr: 0.000121 grad: 0.0568 (0.0648) loss: 0.8423 (0.8408) time: 0.1587 data: 0.0716 max mem: 9377 +Train: [15] [4200/6250] eta: 0:05:13 lr: 0.000121 grad: 0.0603 (0.0648) loss: 0.8411 (0.8408) time: 0.1239 data: 0.0281 max mem: 9377 +Train: [15] [4300/6250] eta: 0:04:57 lr: 0.000121 grad: 0.0582 (0.0648) loss: 0.8437 (0.8408) time: 0.1321 data: 0.0497 max mem: 9377 +Train: [15] [4400/6250] eta: 0:04:41 lr: 0.000121 grad: 0.0580 (0.0647) loss: 0.8438 (0.8409) time: 0.1255 data: 0.0359 max mem: 9377 +Train: [15] [4500/6250] eta: 0:04:26 lr: 0.000121 grad: 0.0584 (0.0646) loss: 0.8415 (0.8409) time: 0.1567 data: 0.0787 max mem: 9377 +Train: [15] [4600/6250] eta: 0:04:10 lr: 0.000121 grad: 0.0566 (0.0646) loss: 0.8377 (0.8409) time: 0.1197 data: 0.0431 max mem: 9377 +Train: [15] [4700/6250] eta: 0:03:55 lr: 0.000121 grad: 0.0609 (0.0645) loss: 0.8421 (0.8409) time: 0.1432 data: 0.0600 max mem: 9377 +Train: [15] [4800/6250] eta: 0:03:41 lr: 0.000121 grad: 0.0581 (0.0645) loss: 0.8415 (0.8409) time: 0.1971 data: 0.1166 max mem: 9377 +Train: [15] [4900/6250] eta: 0:03:26 lr: 0.000121 grad: 0.0578 (0.0644) loss: 0.8409 (0.8409) time: 0.1743 data: 0.0957 max mem: 9377 +Train: [15] [5000/6250] eta: 0:03:11 lr: 0.000121 grad: 0.0581 (0.0643) loss: 0.8424 (0.8409) time: 0.1390 data: 0.0647 max mem: 9377 +Train: [15] [5100/6250] eta: 0:02:56 lr: 0.000121 grad: 0.0571 (0.0643) loss: 0.8429 (0.8410) time: 0.1670 data: 0.0923 max mem: 9377 +Train: [15] [5200/6250] eta: 0:02:40 lr: 0.000121 grad: 0.0610 (0.0642) loss: 0.8431 (0.8410) time: 0.1663 data: 0.0910 max mem: 9377 +Train: [15] [5300/6250] eta: 0:02:25 lr: 0.000121 grad: 0.0603 (0.0642) loss: 0.8432 (0.8411) time: 0.1618 data: 0.0827 max mem: 9377 +Train: [15] [5400/6250] eta: 0:02:10 lr: 0.000121 grad: 0.0587 (0.0642) loss: 0.8459 (0.8411) time: 0.1581 data: 0.0781 max mem: 9377 +Train: [15] [5500/6250] eta: 0:01:54 lr: 0.000121 grad: 0.0588 (0.0641) loss: 0.8422 (0.8411) time: 0.1316 data: 0.0523 max mem: 9377 +Train: [15] [5600/6250] eta: 0:01:39 lr: 0.000121 grad: 0.0569 (0.0641) loss: 0.8428 (0.8412) time: 0.1533 data: 0.0751 max mem: 9377 +Train: [15] [5700/6250] eta: 0:01:23 lr: 0.000121 grad: 0.0600 (0.0641) loss: 0.8428 (0.8412) time: 0.1436 data: 0.0605 max mem: 9377 +Train: [15] [5800/6250] eta: 0:01:08 lr: 0.000121 grad: 0.0634 (0.0641) loss: 0.8389 (0.8412) time: 0.1374 data: 0.0543 max mem: 9377 +Train: [15] [5900/6250] eta: 0:00:53 lr: 0.000121 grad: 0.0614 (0.0641) loss: 0.8395 (0.8412) time: 0.1425 data: 0.0625 max mem: 9377 +Train: [15] [6000/6250] eta: 0:00:38 lr: 0.000121 grad: 0.0627 (0.0641) loss: 0.8409 (0.8412) time: 0.1397 data: 0.0595 max mem: 9377 +Train: [15] [6100/6250] eta: 0:00:22 lr: 0.000121 grad: 0.0655 (0.0642) loss: 0.8402 (0.8411) time: 0.1349 data: 0.0544 max mem: 9377 +Train: [15] [6200/6250] eta: 0:00:07 lr: 0.000121 grad: 0.0609 (0.0642) loss: 0.8394 (0.8411) time: 0.1223 data: 0.0450 max mem: 9377 +Train: [15] [6249/6250] eta: 0:00:00 lr: 0.000121 grad: 0.0655 (0.0643) loss: 0.8427 (0.8411) time: 0.1462 data: 0.0671 max mem: 9377 +Train: [15] Total time: 0:15:51 (0.1523 s / it) +Averaged stats: lr: 0.000121 grad: 0.0655 (0.0643) loss: 0.8427 (0.8411) +Eval (hcp-train-subset): [15] [ 0/62] eta: 0:04:01 loss: 0.8469 (0.8469) time: 3.9007 data: 3.8116 max mem: 9377 +Eval (hcp-train-subset): [15] [61/62] eta: 0:00:00 loss: 0.8425 (0.8439) time: 0.1207 data: 0.0930 max mem: 9377 +Eval (hcp-train-subset): [15] Total time: 0:00:13 (0.2243 s / it) +Averaged stats (hcp-train-subset): loss: 0.8425 (0.8439) +Eval (hcp-val): [15] [ 0/62] eta: 0:04:27 loss: 0.8432 (0.8432) time: 4.3213 data: 4.2910 max mem: 9377 +Eval (hcp-val): [15] [61/62] eta: 0:00:00 loss: 0.8438 (0.8436) time: 0.1305 data: 0.1050 max mem: 9377 +Eval (hcp-val): [15] Total time: 0:00:13 (0.2111 s / it) +Averaged stats (hcp-val): loss: 0.8438 (0.8436) +Eval (nsd-val): [15] [ 0/62] eta: 0:03:14 loss: 0.7994 (0.7994) time: 3.1354 data: 3.0603 max mem: 9377 +Eval (nsd-val): [15] [61/62] eta: 0:00:00 loss: 0.8095 (0.8122) time: 0.1399 data: 0.1145 max mem: 9377 +Eval (nsd-val): [15] Total time: 0:00:13 (0.2108 s / it) +Averaged stats (nsd-val): loss: 0.8095 (0.8122) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [16] [ 0/6250] eta: 9:22:55 lr: 0.000121 grad: 0.1102 (0.1102) loss: 0.8327 (0.8327) time: 5.4040 data: 5.3071 max mem: 9377 +Train: [16] [ 100/6250] eta: 0:21:16 lr: 0.000121 grad: 0.0702 (0.0678) loss: 0.8391 (0.8494) time: 0.1756 data: 0.0961 max mem: 9377 +Train: [16] [ 200/6250] eta: 0:19:04 lr: 0.000121 grad: 0.0654 (0.0673) loss: 0.8327 (0.8435) time: 0.1689 data: 0.0713 max mem: 9377 +Train: [16] [ 300/6250] eta: 0:17:59 lr: 0.000121 grad: 0.0622 (0.0671) loss: 0.8438 (0.8413) time: 0.1686 data: 0.0785 max mem: 9377 +Train: [16] [ 400/6250] eta: 0:17:00 lr: 0.000121 grad: 0.0625 (0.0662) loss: 0.8389 (0.8415) time: 0.1841 data: 0.0951 max mem: 9377 +Train: [16] [ 500/6250] eta: 0:16:07 lr: 0.000121 grad: 0.0591 (0.0650) loss: 0.8402 (0.8414) time: 0.1494 data: 0.0634 max mem: 9377 +Train: [16] [ 600/6250] eta: 0:15:25 lr: 0.000121 grad: 0.0638 (0.0648) loss: 0.8365 (0.8415) time: 0.1272 data: 0.0385 max mem: 9377 +Train: [16] [ 700/6250] eta: 0:14:54 lr: 0.000121 grad: 0.0686 (0.0647) loss: 0.8399 (0.8415) time: 0.1552 data: 0.0747 max mem: 9377 +Train: [16] [ 800/6250] eta: 0:14:32 lr: 0.000121 grad: 0.0596 (0.0644) loss: 0.8424 (0.8418) time: 0.1476 data: 0.0626 max mem: 9377 +Train: [16] [ 900/6250] eta: 0:14:12 lr: 0.000121 grad: 0.0647 (0.0642) loss: 0.8424 (0.8419) time: 0.1474 data: 0.0658 max mem: 9377 +Train: [16] [1000/6250] eta: 0:13:51 lr: 0.000121 grad: 0.0589 (0.0640) loss: 0.8459 (0.8421) time: 0.1391 data: 0.0551 max mem: 9377 +Train: [16] [1100/6250] eta: 0:13:30 lr: 0.000121 grad: 0.0584 (0.0640) loss: 0.8418 (0.8422) time: 0.1363 data: 0.0471 max mem: 9377 +Train: [16] [1200/6250] eta: 0:13:07 lr: 0.000121 grad: 0.0615 (0.0639) loss: 0.8432 (0.8422) time: 0.1405 data: 0.0620 max mem: 9377 +Train: [16] [1300/6250] eta: 0:12:44 lr: 0.000121 grad: 0.0625 (0.0637) loss: 0.8430 (0.8423) time: 0.1290 data: 0.0466 max mem: 9377 +Train: [16] [1400/6250] eta: 0:12:23 lr: 0.000121 grad: 0.0601 (0.0637) loss: 0.8427 (0.8425) time: 0.1236 data: 0.0363 max mem: 9377 +Train: [16] [1500/6250] eta: 0:12:02 lr: 0.000121 grad: 0.0637 (0.0636) loss: 0.8452 (0.8427) time: 0.1434 data: 0.0562 max mem: 9377 +Train: [16] [1600/6250] eta: 0:11:43 lr: 0.000121 grad: 0.0598 (0.0635) loss: 0.8446 (0.8428) time: 0.1456 data: 0.0631 max mem: 9377 +Train: [16] [1700/6250] eta: 0:11:25 lr: 0.000121 grad: 0.0638 (0.0635) loss: 0.8440 (0.8429) time: 0.1483 data: 0.0625 max mem: 9377 +Train: [16] [1800/6250] eta: 0:11:07 lr: 0.000121 grad: 0.0615 (0.0634) loss: 0.8393 (0.8429) time: 0.1235 data: 0.0344 max mem: 9377 +Train: [16] [1900/6250] eta: 0:10:51 lr: 0.000121 grad: 0.0612 (0.0633) loss: 0.8474 (0.8430) time: 0.1342 data: 0.0468 max mem: 9377 +Train: [16] [2000/6250] eta: 0:10:34 lr: 0.000121 grad: 0.0593 (0.0631) loss: 0.8466 (0.8431) time: 0.1513 data: 0.0706 max mem: 9377 +Train: [16] [2100/6250] eta: 0:10:17 lr: 0.000121 grad: 0.0599 (0.0630) loss: 0.8443 (0.8431) time: 0.1254 data: 0.0469 max mem: 9377 +Train: [16] [2200/6250] eta: 0:10:01 lr: 0.000121 grad: 0.0586 (0.0631) loss: 0.8428 (0.8432) time: 0.1572 data: 0.0707 max mem: 9377 +Train: [16] [2300/6250] eta: 0:09:45 lr: 0.000121 grad: 0.0622 (0.0631) loss: 0.8420 (0.8432) time: 0.1658 data: 0.0807 max mem: 9377 +Train: [16] [2400/6250] eta: 0:09:28 lr: 0.000121 grad: 0.0609 (0.0631) loss: 0.8479 (0.8432) time: 0.1149 data: 0.0349 max mem: 9377 +Train: [16] [2500/6250] eta: 0:09:14 lr: 0.000121 grad: 0.0592 (0.0631) loss: 0.8467 (0.8433) time: 0.1549 data: 0.0694 max mem: 9377 +Train: [16] [2600/6250] eta: 0:08:58 lr: 0.000121 grad: 0.0622 (0.0630) loss: 0.8467 (0.8433) time: 0.1568 data: 0.0779 max mem: 9377 +Train: [16] [2700/6250] eta: 0:08:42 lr: 0.000121 grad: 0.0601 (0.0631) loss: 0.8395 (0.8433) time: 0.1356 data: 0.0517 max mem: 9377 +Train: [16] [2800/6250] eta: 0:08:27 lr: 0.000121 grad: 0.0620 (0.0631) loss: 0.8380 (0.8433) time: 0.1547 data: 0.0714 max mem: 9377 +Train: [16] [2900/6250] eta: 0:08:15 lr: 0.000121 grad: 0.0658 (0.0631) loss: 0.8399 (0.8433) time: 0.1792 data: 0.0928 max mem: 9377 +Train: [16] [3000/6250] eta: 0:08:01 lr: 0.000121 grad: 0.0611 (0.0632) loss: 0.8400 (0.8432) time: 0.1569 data: 0.0783 max mem: 9377 +Train: [16] [3100/6250] eta: 0:07:47 lr: 0.000121 grad: 0.0620 (0.0632) loss: 0.8410 (0.8431) time: 0.1562 data: 0.0747 max mem: 9377 +Train: [16] [3200/6250] eta: 0:07:32 lr: 0.000121 grad: 0.0615 (0.0632) loss: 0.8406 (0.8430) time: 0.1350 data: 0.0585 max mem: 9377 +Train: [16] [3300/6250] eta: 0:07:18 lr: 0.000121 grad: 0.0637 (0.0632) loss: 0.8429 (0.8429) time: 0.1419 data: 0.0595 max mem: 9377 +Train: [16] [3400/6250] eta: 0:07:03 lr: 0.000121 grad: 0.0622 (0.0633) loss: 0.8416 (0.8428) time: 0.1509 data: 0.0652 max mem: 9377 +Train: [16] [3500/6250] eta: 0:06:48 lr: 0.000120 grad: 0.0601 (0.0633) loss: 0.8384 (0.8427) time: 0.1550 data: 0.0697 max mem: 9377 +Train: [16] [3600/6250] eta: 0:06:33 lr: 0.000120 grad: 0.0617 (0.0633) loss: 0.8367 (0.8427) time: 0.1498 data: 0.0697 max mem: 9377 +Train: [16] [3700/6250] eta: 0:06:19 lr: 0.000120 grad: 0.0631 (0.0633) loss: 0.8347 (0.8426) time: 0.1374 data: 0.0598 max mem: 9377 +Train: [16] [3800/6250] eta: 0:06:03 lr: 0.000120 grad: 0.0615 (0.0633) loss: 0.8374 (0.8424) time: 0.1230 data: 0.0426 max mem: 9377 +Train: [16] [3900/6250] eta: 0:05:48 lr: 0.000120 grad: 0.0587 (0.0633) loss: 0.8359 (0.8423) time: 0.1366 data: 0.0510 max mem: 9377 +Train: [16] [4000/6250] eta: 0:05:33 lr: 0.000120 grad: 0.0644 (0.0633) loss: 0.8408 (0.8423) time: 0.1442 data: 0.0652 max mem: 9377 +Train: [16] [4100/6250] eta: 0:05:18 lr: 0.000120 grad: 0.0674 (0.0634) loss: 0.8388 (0.8421) time: 0.1483 data: 0.0645 max mem: 9377 +Train: [16] [4200/6250] eta: 0:05:03 lr: 0.000120 grad: 0.0635 (0.0634) loss: 0.8425 (0.8421) time: 0.1599 data: 0.0716 max mem: 9377 +Train: [16] [4300/6250] eta: 0:04:47 lr: 0.000120 grad: 0.0640 (0.0634) loss: 0.8420 (0.8420) time: 0.1429 data: 0.0611 max mem: 9377 +Train: [16] [4400/6250] eta: 0:04:32 lr: 0.000120 grad: 0.0664 (0.0635) loss: 0.8391 (0.8420) time: 0.1460 data: 0.0570 max mem: 9377 +Train: [16] [4500/6250] eta: 0:04:17 lr: 0.000120 grad: 0.0652 (0.0635) loss: 0.8417 (0.8419) time: 0.1359 data: 0.0464 max mem: 9377 +Train: [16] [4600/6250] eta: 0:04:02 lr: 0.000120 grad: 0.0684 (0.0636) loss: 0.8387 (0.8418) time: 0.1163 data: 0.0343 max mem: 9377 +Train: [16] [4700/6250] eta: 0:03:47 lr: 0.000120 grad: 0.0643 (0.0637) loss: 0.8430 (0.8418) time: 0.1564 data: 0.0780 max mem: 9377 +Train: [16] [4800/6250] eta: 0:03:32 lr: 0.000120 grad: 0.0640 (0.0637) loss: 0.8393 (0.8417) time: 0.1445 data: 0.0638 max mem: 9377 +Train: [16] [4900/6250] eta: 0:03:17 lr: 0.000120 grad: 0.0629 (0.0639) loss: 0.8391 (0.8416) time: 0.1302 data: 0.0496 max mem: 9377 +Train: [16] [5000/6250] eta: 0:03:03 lr: 0.000120 grad: 0.0673 (0.0640) loss: 0.8383 (0.8416) time: 0.1282 data: 0.0419 max mem: 9377 +Train: [16] [5100/6250] eta: 0:02:48 lr: 0.000120 grad: 0.0650 (0.0641) loss: 0.8388 (0.8416) time: 0.1612 data: 0.0827 max mem: 9377 +Train: [16] [5200/6250] eta: 0:02:34 lr: 0.000120 grad: 0.0625 (0.0641) loss: 0.8434 (0.8416) time: 0.1537 data: 0.0696 max mem: 9377 +Train: [16] [5300/6250] eta: 0:02:19 lr: 0.000120 grad: 0.0660 (0.0643) loss: 0.8399 (0.8415) time: 0.1677 data: 0.0901 max mem: 9377 +Train: [16] [5400/6250] eta: 0:02:04 lr: 0.000120 grad: 0.0695 (0.0644) loss: 0.8372 (0.8415) time: 0.1588 data: 0.0791 max mem: 9377 +Train: [16] [5500/6250] eta: 0:01:50 lr: 0.000120 grad: 0.0635 (0.0644) loss: 0.8418 (0.8415) time: 0.1577 data: 0.0807 max mem: 9377 +Train: [16] [5600/6250] eta: 0:01:35 lr: 0.000120 grad: 0.0655 (0.0645) loss: 0.8436 (0.8414) time: 0.1476 data: 0.0600 max mem: 9377 +Train: [16] [5700/6250] eta: 0:01:20 lr: 0.000120 grad: 0.0635 (0.0645) loss: 0.8357 (0.8414) time: 0.1338 data: 0.0519 max mem: 9377 +Train: [16] [5800/6250] eta: 0:01:06 lr: 0.000120 grad: 0.0658 (0.0646) loss: 0.8402 (0.8414) time: 0.1593 data: 0.0721 max mem: 9377 +Train: [16] [5900/6250] eta: 0:00:51 lr: 0.000120 grad: 0.0592 (0.0646) loss: 0.8387 (0.8414) time: 0.1482 data: 0.0657 max mem: 9377 +Train: [16] [6000/6250] eta: 0:00:36 lr: 0.000120 grad: 0.0624 (0.0646) loss: 0.8430 (0.8414) time: 0.1583 data: 0.0806 max mem: 9377 +Train: [16] [6100/6250] eta: 0:00:22 lr: 0.000120 grad: 0.0633 (0.0646) loss: 0.8415 (0.8414) time: 0.1330 data: 0.0431 max mem: 9377 +Train: [16] [6200/6250] eta: 0:00:07 lr: 0.000120 grad: 0.0626 (0.0647) loss: 0.8370 (0.8413) time: 0.1669 data: 0.0863 max mem: 9377 +Train: [16] [6249/6250] eta: 0:00:00 lr: 0.000120 grad: 0.0646 (0.0647) loss: 0.8363 (0.8413) time: 0.1553 data: 0.0744 max mem: 9377 +Train: [16] Total time: 0:15:24 (0.1479 s / it) +Averaged stats: lr: 0.000120 grad: 0.0646 (0.0647) loss: 0.8363 (0.8413) +Eval (hcp-train-subset): [16] [ 0/62] eta: 0:05:31 loss: 0.8451 (0.8451) time: 5.3462 data: 5.3101 max mem: 9377 +Eval (hcp-train-subset): [16] [61/62] eta: 0:00:00 loss: 0.8421 (0.8435) time: 0.1202 data: 0.0950 max mem: 9377 +Eval (hcp-train-subset): [16] Total time: 0:00:13 (0.2245 s / it) +Averaged stats (hcp-train-subset): loss: 0.8421 (0.8435) +Eval (hcp-val): [16] [ 0/62] eta: 0:03:16 loss: 0.8380 (0.8380) time: 3.1630 data: 3.1039 max mem: 9377 +Eval (hcp-val): [16] [61/62] eta: 0:00:00 loss: 0.8425 (0.8430) time: 0.1298 data: 0.1046 max mem: 9377 +Eval (hcp-val): [16] Total time: 0:00:13 (0.2147 s / it) +Averaged stats (hcp-val): loss: 0.8425 (0.8430) +Eval (nsd-val): [16] [ 0/62] eta: 0:03:25 loss: 0.8005 (0.8005) time: 3.3149 data: 3.2450 max mem: 9377 +Eval (nsd-val): [16] [61/62] eta: 0:00:00 loss: 0.8140 (0.8138) time: 0.1345 data: 0.1089 max mem: 9377 +Eval (nsd-val): [16] Total time: 0:00:12 (0.2058 s / it) +Averaged stats (nsd-val): loss: 0.8140 (0.8138) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [17] [ 0/6250] eta: 9:02:48 lr: 0.000120 grad: 0.0527 (0.0527) loss: 0.8665 (0.8665) time: 5.2110 data: 5.0717 max mem: 9377 +Train: [17] [ 100/6250] eta: 0:19:25 lr: 0.000120 grad: 0.0633 (0.0663) loss: 0.8459 (0.8474) time: 0.1346 data: 0.0492 max mem: 9377 +Train: [17] [ 200/6250] eta: 0:16:40 lr: 0.000120 grad: 0.0574 (0.0652) loss: 0.8454 (0.8448) time: 0.1321 data: 0.0378 max mem: 9377 +Train: [17] [ 300/6250] eta: 0:15:29 lr: 0.000120 grad: 0.0605 (0.0656) loss: 0.8380 (0.8432) time: 0.1262 data: 0.0365 max mem: 9377 +Train: [17] [ 400/6250] eta: 0:14:45 lr: 0.000120 grad: 0.0679 (0.0657) loss: 0.8387 (0.8414) time: 0.1394 data: 0.0575 max mem: 9377 +Train: [17] [ 500/6250] eta: 0:14:09 lr: 0.000120 grad: 0.0659 (0.0657) loss: 0.8412 (0.8401) time: 0.1411 data: 0.0472 max mem: 9377 +Train: [17] [ 600/6250] eta: 0:13:44 lr: 0.000120 grad: 0.0645 (0.0655) loss: 0.8386 (0.8397) time: 0.1726 data: 0.0891 max mem: 9377 +Train: [17] [ 700/6250] eta: 0:13:21 lr: 0.000120 grad: 0.0612 (0.0651) loss: 0.8380 (0.8394) time: 0.1199 data: 0.0343 max mem: 9377 +Train: [17] [ 800/6250] eta: 0:13:06 lr: 0.000120 grad: 0.0657 (0.0651) loss: 0.8396 (0.8393) time: 0.1390 data: 0.0506 max mem: 9377 +Train: [17] [ 900/6250] eta: 0:12:53 lr: 0.000120 grad: 0.0637 (0.0650) loss: 0.8318 (0.8390) time: 0.1555 data: 0.0642 max mem: 9377 +Train: [17] [1000/6250] eta: 0:12:34 lr: 0.000120 grad: 0.0653 (0.0648) loss: 0.8353 (0.8389) time: 0.1220 data: 0.0387 max mem: 9377 +Train: [17] [1100/6250] eta: 0:12:20 lr: 0.000120 grad: 0.0655 (0.0651) loss: 0.8351 (0.8387) time: 0.1479 data: 0.0689 max mem: 9377 +Train: [17] [1200/6250] eta: 0:12:03 lr: 0.000120 grad: 0.0655 (0.0652) loss: 0.8414 (0.8384) time: 0.1486 data: 0.0661 max mem: 9377 +Train: [17] [1300/6250] eta: 0:11:47 lr: 0.000120 grad: 0.0645 (0.0651) loss: 0.8329 (0.8382) time: 0.1392 data: 0.0566 max mem: 9377 +Train: [17] [1400/6250] eta: 0:11:34 lr: 0.000120 grad: 0.0621 (0.0653) loss: 0.8325 (0.8381) time: 0.1348 data: 0.0546 max mem: 9377 +Train: [17] [1500/6250] eta: 0:11:19 lr: 0.000120 grad: 0.0689 (0.0655) loss: 0.8345 (0.8379) time: 0.1454 data: 0.0667 max mem: 9377 +Train: [17] [1600/6250] eta: 0:11:04 lr: 0.000120 grad: 0.0597 (0.0654) loss: 0.8396 (0.8379) time: 0.1455 data: 0.0638 max mem: 9377 +Train: [17] [1700/6250] eta: 0:10:47 lr: 0.000120 grad: 0.0609 (0.0654) loss: 0.8353 (0.8379) time: 0.1459 data: 0.0639 max mem: 9377 +Train: [17] [1800/6250] eta: 0:10:31 lr: 0.000120 grad: 0.0703 (0.0656) loss: 0.8377 (0.8378) time: 0.1346 data: 0.0477 max mem: 9377 +Train: [17] [1900/6250] eta: 0:10:16 lr: 0.000120 grad: 0.0626 (0.0654) loss: 0.8341 (0.8378) time: 0.1617 data: 0.0840 max mem: 9377 +Train: [17] [2000/6250] eta: 0:10:02 lr: 0.000120 grad: 0.0588 (0.0653) loss: 0.8423 (0.8378) time: 0.1304 data: 0.0471 max mem: 9377 +Train: [17] [2100/6250] eta: 0:09:47 lr: 0.000120 grad: 0.0611 (0.0652) loss: 0.8398 (0.8379) time: 0.1334 data: 0.0477 max mem: 9377 +Train: [17] [2200/6250] eta: 0:09:33 lr: 0.000120 grad: 0.0578 (0.0652) loss: 0.8427 (0.8380) time: 0.1301 data: 0.0434 max mem: 9377 +Train: [17] [2300/6250] eta: 0:09:19 lr: 0.000120 grad: 0.0624 (0.0651) loss: 0.8369 (0.8380) time: 0.1493 data: 0.0675 max mem: 9377 +Train: [17] [2400/6250] eta: 0:09:04 lr: 0.000120 grad: 0.0621 (0.0651) loss: 0.8405 (0.8381) time: 0.1264 data: 0.0385 max mem: 9377 +Train: [17] [2500/6250] eta: 0:08:49 lr: 0.000120 grad: 0.0631 (0.0650) loss: 0.8369 (0.8381) time: 0.1444 data: 0.0643 max mem: 9377 +Train: [17] [2600/6250] eta: 0:08:34 lr: 0.000120 grad: 0.0649 (0.0651) loss: 0.8389 (0.8382) time: 0.1319 data: 0.0505 max mem: 9377 +Train: [17] [2700/6250] eta: 0:08:20 lr: 0.000120 grad: 0.0687 (0.0651) loss: 0.8355 (0.8382) time: 0.1372 data: 0.0623 max mem: 9377 +Train: [17] [2800/6250] eta: 0:08:08 lr: 0.000120 grad: 0.0626 (0.0650) loss: 0.8321 (0.8382) time: 0.1735 data: 0.0956 max mem: 9377 +Train: [17] [2900/6250] eta: 0:07:55 lr: 0.000120 grad: 0.0618 (0.0650) loss: 0.8398 (0.8383) time: 0.1382 data: 0.0532 max mem: 9377 +Train: [17] [3000/6250] eta: 0:07:42 lr: 0.000120 grad: 0.0644 (0.0650) loss: 0.8390 (0.8382) time: 0.1555 data: 0.0661 max mem: 9377 +Train: [17] [3100/6250] eta: 0:07:28 lr: 0.000120 grad: 0.0600 (0.0650) loss: 0.8406 (0.8383) time: 0.1361 data: 0.0536 max mem: 9377 +Train: [17] [3200/6250] eta: 0:07:15 lr: 0.000120 grad: 0.0602 (0.0650) loss: 0.8412 (0.8384) time: 0.1638 data: 0.0819 max mem: 9377 +Train: [17] [3300/6250] eta: 0:07:01 lr: 0.000120 grad: 0.0633 (0.0650) loss: 0.8433 (0.8385) time: 0.1434 data: 0.0590 max mem: 9377 +Train: [17] [3400/6250] eta: 0:06:47 lr: 0.000120 grad: 0.0675 (0.0650) loss: 0.8405 (0.8385) time: 0.1474 data: 0.0608 max mem: 9377 +Train: [17] [3500/6250] eta: 0:06:32 lr: 0.000120 grad: 0.0601 (0.0650) loss: 0.8430 (0.8386) time: 0.1445 data: 0.0580 max mem: 9377 +Train: [17] [3600/6250] eta: 0:06:20 lr: 0.000120 grad: 0.0631 (0.0650) loss: 0.8371 (0.8386) time: 0.1677 data: 0.0885 max mem: 9377 +Train: [17] [3700/6250] eta: 0:06:06 lr: 0.000120 grad: 0.0669 (0.0649) loss: 0.8342 (0.8386) time: 0.1489 data: 0.0671 max mem: 9377 +Train: [17] [3800/6250] eta: 0:05:51 lr: 0.000120 grad: 0.0599 (0.0649) loss: 0.8388 (0.8387) time: 0.1487 data: 0.0701 max mem: 9377 +Train: [17] [3900/6250] eta: 0:05:37 lr: 0.000120 grad: 0.0588 (0.0648) loss: 0.8399 (0.8388) time: 0.1710 data: 0.0837 max mem: 9377 +Train: [17] [4000/6250] eta: 0:05:23 lr: 0.000120 grad: 0.0650 (0.0649) loss: 0.8454 (0.8389) time: 0.1388 data: 0.0488 max mem: 9377 +Train: [17] [4100/6250] eta: 0:05:08 lr: 0.000120 grad: 0.0595 (0.0648) loss: 0.8421 (0.8390) time: 0.1398 data: 0.0579 max mem: 9377 +Train: [17] [4200/6250] eta: 0:04:54 lr: 0.000120 grad: 0.0608 (0.0648) loss: 0.8490 (0.8391) time: 0.1591 data: 0.0685 max mem: 9377 +Train: [17] [4300/6250] eta: 0:04:39 lr: 0.000120 grad: 0.0654 (0.0648) loss: 0.8439 (0.8392) time: 0.1338 data: 0.0470 max mem: 9377 +Train: [17] [4400/6250] eta: 0:04:25 lr: 0.000120 grad: 0.0594 (0.0648) loss: 0.8394 (0.8392) time: 0.1411 data: 0.0578 max mem: 9377 +Train: [17] [4500/6250] eta: 0:04:10 lr: 0.000120 grad: 0.0606 (0.0647) loss: 0.8463 (0.8393) time: 0.1193 data: 0.0360 max mem: 9377 +Train: [17] [4600/6250] eta: 0:03:56 lr: 0.000120 grad: 0.0606 (0.0647) loss: 0.8398 (0.8393) time: 0.1078 data: 0.0225 max mem: 9377 +Train: [17] [4700/6250] eta: 0:03:41 lr: 0.000120 grad: 0.0655 (0.0647) loss: 0.8428 (0.8393) time: 0.1448 data: 0.0626 max mem: 9377 +Train: [17] [4800/6250] eta: 0:03:27 lr: 0.000120 grad: 0.0639 (0.0646) loss: 0.8351 (0.8393) time: 0.1182 data: 0.0254 max mem: 9377 +Train: [17] [4900/6250] eta: 0:03:13 lr: 0.000119 grad: 0.0606 (0.0646) loss: 0.8435 (0.8393) time: 0.1465 data: 0.0632 max mem: 9377 +Train: [17] [5000/6250] eta: 0:02:58 lr: 0.000119 grad: 0.0589 (0.0646) loss: 0.8407 (0.8394) time: 0.1373 data: 0.0560 max mem: 9377 +Train: [17] [5100/6250] eta: 0:02:44 lr: 0.000119 grad: 0.0619 (0.0645) loss: 0.8374 (0.8394) time: 0.1460 data: 0.0647 max mem: 9377 +Train: [17] [5200/6250] eta: 0:02:30 lr: 0.000119 grad: 0.0592 (0.0645) loss: 0.8374 (0.8394) time: 0.1354 data: 0.0474 max mem: 9377 +Train: [17] [5300/6250] eta: 0:02:15 lr: 0.000119 grad: 0.0573 (0.0644) loss: 0.8347 (0.8394) time: 0.1404 data: 0.0580 max mem: 9377 +Train: [17] [5400/6250] eta: 0:02:01 lr: 0.000119 grad: 0.0632 (0.0644) loss: 0.8427 (0.8394) time: 0.1325 data: 0.0470 max mem: 9377 +Train: [17] [5500/6250] eta: 0:01:47 lr: 0.000119 grad: 0.0631 (0.0645) loss: 0.8374 (0.8394) time: 0.1347 data: 0.0500 max mem: 9377 +Train: [17] [5600/6250] eta: 0:01:32 lr: 0.000119 grad: 0.0644 (0.0645) loss: 0.8346 (0.8393) time: 0.1468 data: 0.0623 max mem: 9377 +Train: [17] [5700/6250] eta: 0:01:18 lr: 0.000119 grad: 0.0603 (0.0645) loss: 0.8384 (0.8393) time: 0.1377 data: 0.0560 max mem: 9377 +Train: [17] [5800/6250] eta: 0:01:04 lr: 0.000119 grad: 0.0659 (0.0645) loss: 0.8368 (0.8392) time: 0.1871 data: 0.1133 max mem: 9377 +Train: [17] [5900/6250] eta: 0:00:50 lr: 0.000119 grad: 0.0623 (0.0645) loss: 0.8361 (0.8392) time: 0.1730 data: 0.0934 max mem: 9377 +Train: [17] [6000/6250] eta: 0:00:35 lr: 0.000119 grad: 0.0647 (0.0645) loss: 0.8409 (0.8392) time: 0.1997 data: 0.1232 max mem: 9377 +Train: [17] [6100/6250] eta: 0:00:21 lr: 0.000119 grad: 0.0644 (0.0645) loss: 0.8352 (0.8392) time: 0.1641 data: 0.0842 max mem: 9377 +Train: [17] [6200/6250] eta: 0:00:07 lr: 0.000119 grad: 0.0655 (0.0645) loss: 0.8360 (0.8391) time: 0.1349 data: 0.0550 max mem: 9377 +Train: [17] [6249/6250] eta: 0:00:00 lr: 0.000119 grad: 0.0616 (0.0645) loss: 0.8384 (0.8391) time: 0.1530 data: 0.0813 max mem: 9377 +Train: [17] Total time: 0:15:04 (0.1447 s / it) +Averaged stats: lr: 0.000119 grad: 0.0616 (0.0645) loss: 0.8384 (0.8391) +Eval (hcp-train-subset): [17] [ 0/62] eta: 0:05:19 loss: 0.8441 (0.8441) time: 5.1571 data: 5.1262 max mem: 9377 +Eval (hcp-train-subset): [17] [61/62] eta: 0:00:00 loss: 0.8432 (0.8419) time: 0.1542 data: 0.1288 max mem: 9377 +Eval (hcp-train-subset): [17] Total time: 0:00:14 (0.2371 s / it) +Averaged stats (hcp-train-subset): loss: 0.8432 (0.8419) +Eval (hcp-val): [17] [ 0/62] eta: 0:04:14 loss: 0.8404 (0.8404) time: 4.1065 data: 4.0562 max mem: 9377 +Eval (hcp-val): [17] [61/62] eta: 0:00:00 loss: 0.8423 (0.8419) time: 0.1118 data: 0.0862 max mem: 9377 +Eval (hcp-val): [17] Total time: 0:00:12 (0.2063 s / it) +Averaged stats (hcp-val): loss: 0.8423 (0.8419) +Eval (nsd-val): [17] [ 0/62] eta: 0:03:45 loss: 0.8042 (0.8042) time: 3.6435 data: 3.5717 max mem: 9377 +Eval (nsd-val): [17] [61/62] eta: 0:00:00 loss: 0.8113 (0.8139) time: 0.1379 data: 0.1125 max mem: 9377 +Eval (nsd-val): [17] Total time: 0:00:13 (0.2142 s / it) +Averaged stats (nsd-val): loss: 0.8113 (0.8139) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [18] [ 0/6250] eta: 9:29:18 lr: 0.000119 grad: 0.1000 (0.1000) loss: 0.8468 (0.8468) time: 5.4654 data: 5.3726 max mem: 9377 +Train: [18] [ 100/6250] eta: 0:19:38 lr: 0.000119 grad: 0.0631 (0.0646) loss: 0.8387 (0.8479) time: 0.1662 data: 0.0765 max mem: 9377 +Train: [18] [ 200/6250] eta: 0:17:02 lr: 0.000119 grad: 0.0648 (0.0659) loss: 0.8375 (0.8426) time: 0.1259 data: 0.0420 max mem: 9377 +Train: [18] [ 300/6250] eta: 0:15:54 lr: 0.000119 grad: 0.0639 (0.0660) loss: 0.8381 (0.8412) time: 0.1474 data: 0.0614 max mem: 9377 +Train: [18] [ 400/6250] eta: 0:15:09 lr: 0.000119 grad: 0.0651 (0.0659) loss: 0.8310 (0.8398) time: 0.1568 data: 0.0798 max mem: 9377 +Train: [18] [ 500/6250] eta: 0:14:37 lr: 0.000119 grad: 0.0621 (0.0659) loss: 0.8405 (0.8397) time: 0.1413 data: 0.0523 max mem: 9377 +Train: [18] [ 600/6250] eta: 0:14:06 lr: 0.000119 grad: 0.0688 (0.0659) loss: 0.8349 (0.8393) time: 0.1517 data: 0.0661 max mem: 9377 +Train: [18] [ 700/6250] eta: 0:13:58 lr: 0.000119 grad: 0.0597 (0.0658) loss: 0.8366 (0.8390) time: 0.1591 data: 0.0771 max mem: 9377 +Train: [18] [ 800/6250] eta: 0:13:50 lr: 0.000119 grad: 0.0627 (0.0655) loss: 0.8392 (0.8390) time: 0.1407 data: 0.0561 max mem: 9377 +Train: [18] [ 900/6250] eta: 0:13:47 lr: 0.000119 grad: 0.0615 (0.0652) loss: 0.8428 (0.8391) time: 0.1400 data: 0.0422 max mem: 9377 +Train: [18] [1000/6250] eta: 0:13:38 lr: 0.000119 grad: 0.0645 (0.0650) loss: 0.8423 (0.8394) time: 0.1560 data: 0.0663 max mem: 9377 +Train: [18] [1100/6250] eta: 0:13:30 lr: 0.000119 grad: 0.0607 (0.0648) loss: 0.8433 (0.8398) time: 0.1482 data: 0.0621 max mem: 9377 +Train: [18] [1200/6250] eta: 0:13:15 lr: 0.000119 grad: 0.0605 (0.0646) loss: 0.8423 (0.8400) time: 0.1757 data: 0.0877 max mem: 9377 +Train: [18] [1300/6250] eta: 0:12:52 lr: 0.000119 grad: 0.0607 (0.0644) loss: 0.8443 (0.8402) time: 0.1358 data: 0.0474 max mem: 9377 +Train: [18] [1400/6250] eta: 0:12:34 lr: 0.000119 grad: 0.0623 (0.0642) loss: 0.8405 (0.8404) time: 0.1533 data: 0.0711 max mem: 9377 +Train: [18] [1500/6250] eta: 0:12:17 lr: 0.000119 grad: 0.0620 (0.0640) loss: 0.8440 (0.8406) time: 0.1893 data: 0.1078 max mem: 9377 +Train: [18] [1600/6250] eta: 0:11:58 lr: 0.000119 grad: 0.0607 (0.0639) loss: 0.8442 (0.8407) time: 0.1345 data: 0.0515 max mem: 9377 +Train: [18] [1700/6250] eta: 0:11:41 lr: 0.000119 grad: 0.0602 (0.0638) loss: 0.8441 (0.8408) time: 0.1468 data: 0.0631 max mem: 9377 +Train: [18] [1800/6250] eta: 0:11:25 lr: 0.000119 grad: 0.0619 (0.0638) loss: 0.8415 (0.8409) time: 0.1373 data: 0.0522 max mem: 9377 +Train: [18] [1900/6250] eta: 0:11:08 lr: 0.000119 grad: 0.0614 (0.0638) loss: 0.8368 (0.8409) time: 0.1369 data: 0.0530 max mem: 9377 +Train: [18] [2000/6250] eta: 0:10:52 lr: 0.000119 grad: 0.0591 (0.0638) loss: 0.8357 (0.8408) time: 0.1317 data: 0.0454 max mem: 9377 +Train: [18] [2100/6250] eta: 0:10:36 lr: 0.000119 grad: 0.0615 (0.0639) loss: 0.8414 (0.8407) time: 0.1413 data: 0.0551 max mem: 9377 +Train: [18] [2200/6250] eta: 0:10:19 lr: 0.000119 grad: 0.0657 (0.0639) loss: 0.8391 (0.8406) time: 0.1294 data: 0.0416 max mem: 9377 +Train: [18] [2300/6250] eta: 0:10:02 lr: 0.000119 grad: 0.0694 (0.0641) loss: 0.8385 (0.8405) time: 0.1445 data: 0.0613 max mem: 9377 +Train: [18] [2400/6250] eta: 0:09:46 lr: 0.000119 grad: 0.0554 (0.0641) loss: 0.8469 (0.8405) time: 0.1416 data: 0.0557 max mem: 9377 +Train: [18] [2500/6250] eta: 0:09:30 lr: 0.000119 grad: 0.0615 (0.0642) loss: 0.8400 (0.8404) time: 0.1402 data: 0.0598 max mem: 9377 +Train: [18] [2600/6250] eta: 0:09:17 lr: 0.000119 grad: 0.0609 (0.0643) loss: 0.8414 (0.8403) time: 0.1760 data: 0.0892 max mem: 9377 +Train: [18] [2700/6250] eta: 0:09:03 lr: 0.000119 grad: 0.0644 (0.0644) loss: 0.8325 (0.8402) time: 0.1603 data: 0.0735 max mem: 9377 +Train: [18] [2800/6250] eta: 0:08:49 lr: 0.000119 grad: 0.0631 (0.0644) loss: 0.8330 (0.8400) time: 0.1237 data: 0.0476 max mem: 9377 +Train: [18] [2900/6250] eta: 0:08:34 lr: 0.000119 grad: 0.0598 (0.0644) loss: 0.8371 (0.8399) time: 0.1652 data: 0.0810 max mem: 9377 +Train: [18] [3000/6250] eta: 0:08:18 lr: 0.000119 grad: 0.0609 (0.0644) loss: 0.8368 (0.8397) time: 0.1453 data: 0.0579 max mem: 9377 +Train: [18] [3100/6250] eta: 0:08:03 lr: 0.000119 grad: 0.0626 (0.0646) loss: 0.8357 (0.8396) time: 0.1560 data: 0.0740 max mem: 9377 +Train: [18] [3200/6250] eta: 0:07:47 lr: 0.000119 grad: 0.0615 (0.0647) loss: 0.8363 (0.8394) time: 0.1417 data: 0.0520 max mem: 9377 +Train: [18] [3300/6250] eta: 0:07:30 lr: 0.000119 grad: 0.0702 (0.0648) loss: 0.8393 (0.8393) time: 0.1361 data: 0.0465 max mem: 9377 +Train: [18] [3400/6250] eta: 0:07:16 lr: 0.000119 grad: 0.0650 (0.0649) loss: 0.8353 (0.8392) time: 0.1473 data: 0.0643 max mem: 9377 +Train: [18] [3500/6250] eta: 0:07:00 lr: 0.000119 grad: 0.0653 (0.0650) loss: 0.8389 (0.8392) time: 0.1846 data: 0.1057 max mem: 9377 +Train: [18] [3600/6250] eta: 0:06:44 lr: 0.000119 grad: 0.0680 (0.0650) loss: 0.8348 (0.8391) time: 0.1239 data: 0.0429 max mem: 9377 +Train: [18] [3700/6250] eta: 0:06:28 lr: 0.000119 grad: 0.0611 (0.0650) loss: 0.8363 (0.8391) time: 0.1366 data: 0.0550 max mem: 9377 +Train: [18] [3800/6250] eta: 0:06:12 lr: 0.000119 grad: 0.0615 (0.0650) loss: 0.8395 (0.8391) time: 0.1432 data: 0.0638 max mem: 9377 +Train: [18] [3900/6250] eta: 0:05:57 lr: 0.000119 grad: 0.0607 (0.0650) loss: 0.8350 (0.8390) time: 0.1371 data: 0.0554 max mem: 9377 +Train: [18] [4000/6250] eta: 0:05:41 lr: 0.000119 grad: 0.0625 (0.0650) loss: 0.8412 (0.8390) time: 0.1457 data: 0.0638 max mem: 9377 +Train: [18] [4100/6250] eta: 0:05:26 lr: 0.000119 grad: 0.0576 (0.0650) loss: 0.8411 (0.8390) time: 0.1470 data: 0.0643 max mem: 9377 +Train: [18] [4200/6250] eta: 0:05:10 lr: 0.000119 grad: 0.0607 (0.0650) loss: 0.8423 (0.8390) time: 0.1470 data: 0.0592 max mem: 9377 +Train: [18] [4300/6250] eta: 0:04:54 lr: 0.000119 grad: 0.0577 (0.0650) loss: 0.8364 (0.8390) time: 0.1485 data: 0.0671 max mem: 9377 +Train: [18] [4400/6250] eta: 0:04:39 lr: 0.000119 grad: 0.0687 (0.0650) loss: 0.8356 (0.8390) time: 0.1491 data: 0.0648 max mem: 9377 +Train: [18] [4500/6250] eta: 0:04:24 lr: 0.000119 grad: 0.0632 (0.0650) loss: 0.8456 (0.8390) time: 0.1508 data: 0.0698 max mem: 9377 +Train: [18] [4600/6250] eta: 0:04:09 lr: 0.000119 grad: 0.0616 (0.0650) loss: 0.8397 (0.8390) time: 0.1432 data: 0.0579 max mem: 9377 +Train: [18] [4700/6250] eta: 0:03:53 lr: 0.000119 grad: 0.0603 (0.0651) loss: 0.8426 (0.8390) time: 0.1503 data: 0.0650 max mem: 9377 +Train: [18] [4800/6250] eta: 0:03:38 lr: 0.000119 grad: 0.0611 (0.0651) loss: 0.8385 (0.8390) time: 0.1599 data: 0.0790 max mem: 9377 +Train: [18] [4900/6250] eta: 0:03:23 lr: 0.000119 grad: 0.0638 (0.0651) loss: 0.8423 (0.8391) time: 0.1548 data: 0.0758 max mem: 9377 +Train: [18] [5000/6250] eta: 0:03:08 lr: 0.000119 grad: 0.0635 (0.0652) loss: 0.8371 (0.8391) time: 0.1449 data: 0.0625 max mem: 9377 +Train: [18] [5100/6250] eta: 0:02:53 lr: 0.000119 grad: 0.0678 (0.0652) loss: 0.8380 (0.8391) time: 0.1600 data: 0.0814 max mem: 9377 +Train: [18] [5200/6250] eta: 0:02:37 lr: 0.000119 grad: 0.0610 (0.0652) loss: 0.8431 (0.8391) time: 0.1380 data: 0.0508 max mem: 9377 +Train: [18] [5300/6250] eta: 0:02:22 lr: 0.000119 grad: 0.0652 (0.0652) loss: 0.8453 (0.8391) time: 0.1266 data: 0.0456 max mem: 9377 +Train: [18] [5400/6250] eta: 0:02:07 lr: 0.000119 grad: 0.0636 (0.0652) loss: 0.8423 (0.8392) time: 0.1299 data: 0.0485 max mem: 9377 +Train: [18] [5500/6250] eta: 0:01:52 lr: 0.000119 grad: 0.0617 (0.0652) loss: 0.8446 (0.8392) time: 0.1352 data: 0.0561 max mem: 9377 +Train: [18] [5600/6250] eta: 0:01:37 lr: 0.000119 grad: 0.0637 (0.0652) loss: 0.8381 (0.8392) time: 0.1469 data: 0.0692 max mem: 9377 +Train: [18] [5700/6250] eta: 0:01:22 lr: 0.000119 grad: 0.0659 (0.0653) loss: 0.8400 (0.8392) time: 0.1370 data: 0.0545 max mem: 9377 +Train: [18] [5800/6250] eta: 0:01:07 lr: 0.000118 grad: 0.0623 (0.0652) loss: 0.8388 (0.8393) time: 0.1524 data: 0.0692 max mem: 9377 +Train: [18] [5900/6250] eta: 0:00:52 lr: 0.000118 grad: 0.0590 (0.0652) loss: 0.8416 (0.8393) time: 0.1391 data: 0.0526 max mem: 9377 +Train: [18] [6000/6250] eta: 0:00:37 lr: 0.000118 grad: 0.0636 (0.0651) loss: 0.8420 (0.8394) time: 0.1674 data: 0.0871 max mem: 9377 +Train: [18] [6100/6250] eta: 0:00:22 lr: 0.000118 grad: 0.0627 (0.0651) loss: 0.8429 (0.8394) time: 0.1451 data: 0.0601 max mem: 9377 +Train: [18] [6200/6250] eta: 0:00:07 lr: 0.000118 grad: 0.0609 (0.0651) loss: 0.8392 (0.8395) time: 0.1645 data: 0.0809 max mem: 9377 +Train: [18] [6249/6250] eta: 0:00:00 lr: 0.000118 grad: 0.0656 (0.0651) loss: 0.8431 (0.8395) time: 0.1689 data: 0.0896 max mem: 9377 +Train: [18] Total time: 0:15:36 (0.1499 s / it) +Averaged stats: lr: 0.000118 grad: 0.0656 (0.0651) loss: 0.8431 (0.8395) +Eval (hcp-train-subset): [18] [ 0/62] eta: 0:05:30 loss: 0.8419 (0.8419) time: 5.3309 data: 5.2984 max mem: 9377 +Eval (hcp-train-subset): [18] [61/62] eta: 0:00:00 loss: 0.8405 (0.8410) time: 0.1507 data: 0.1248 max mem: 9377 +Eval (hcp-train-subset): [18] Total time: 0:00:13 (0.2248 s / it) +Averaged stats (hcp-train-subset): loss: 0.8405 (0.8410) +Eval (hcp-val): [18] [ 0/62] eta: 0:03:17 loss: 0.8379 (0.8379) time: 3.1873 data: 3.1296 max mem: 9377 +Eval (hcp-val): [18] [61/62] eta: 0:00:00 loss: 0.8403 (0.8414) time: 0.1233 data: 0.0963 max mem: 9377 +Eval (hcp-val): [18] Total time: 0:00:12 (0.2010 s / it) +Averaged stats (hcp-val): loss: 0.8403 (0.8414) +Eval (nsd-val): [18] [ 0/62] eta: 0:05:02 loss: 0.8021 (0.8021) time: 4.8732 data: 4.8433 max mem: 9377 +Eval (nsd-val): [18] [61/62] eta: 0:00:00 loss: 0.8130 (0.8136) time: 0.1456 data: 0.1201 max mem: 9377 +Eval (nsd-val): [18] Total time: 0:00:13 (0.2119 s / it) +Averaged stats (nsd-val): loss: 0.8130 (0.8136) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [19] [ 0/6250] eta: 10:24:05 lr: 0.000118 grad: 0.0917 (0.0917) loss: 0.7746 (0.7746) time: 5.9914 data: 5.8883 max mem: 9377 +Train: [19] [ 100/6250] eta: 0:20:23 lr: 0.000118 grad: 0.0683 (0.0744) loss: 0.8465 (0.8371) time: 0.1478 data: 0.0617 max mem: 9377 +Train: [19] [ 200/6250] eta: 0:17:27 lr: 0.000118 grad: 0.0640 (0.0720) loss: 0.8332 (0.8365) time: 0.1435 data: 0.0486 max mem: 9377 +Train: [19] [ 300/6250] eta: 0:16:14 lr: 0.000118 grad: 0.0620 (0.0701) loss: 0.8373 (0.8357) time: 0.1259 data: 0.0476 max mem: 9377 +Train: [19] [ 400/6250] eta: 0:15:26 lr: 0.000118 grad: 0.0633 (0.0687) loss: 0.8363 (0.8361) time: 0.1423 data: 0.0545 max mem: 9377 +Train: [19] [ 500/6250] eta: 0:14:35 lr: 0.000118 grad: 0.0607 (0.0676) loss: 0.8404 (0.8366) time: 0.1286 data: 0.0307 max mem: 9377 +Train: [19] [ 600/6250] eta: 0:14:08 lr: 0.000118 grad: 0.0664 (0.0671) loss: 0.8427 (0.8370) time: 0.1332 data: 0.0351 max mem: 9377 +Train: [19] [ 700/6250] eta: 0:13:57 lr: 0.000118 grad: 0.0641 (0.0668) loss: 0.8409 (0.8369) time: 0.1801 data: 0.0980 max mem: 9377 +Train: [19] [ 800/6250] eta: 0:13:51 lr: 0.000118 grad: 0.0604 (0.0664) loss: 0.8371 (0.8367) time: 0.1636 data: 0.0789 max mem: 9377 +Train: [19] [ 900/6250] eta: 0:13:46 lr: 0.000118 grad: 0.0670 (0.0667) loss: 0.8377 (0.8366) time: 0.1561 data: 0.0611 max mem: 9377 +Train: [19] [1000/6250] eta: 0:13:36 lr: 0.000118 grad: 0.0603 (0.0665) loss: 0.8316 (0.8363) time: 0.1819 data: 0.1047 max mem: 9377 +Train: [19] [1100/6250] eta: 0:13:14 lr: 0.000118 grad: 0.0653 (0.0664) loss: 0.8353 (0.8362) time: 0.1446 data: 0.0635 max mem: 9377 +Train: [19] [1200/6250] eta: 0:12:56 lr: 0.000118 grad: 0.0619 (0.0663) loss: 0.8338 (0.8361) time: 0.1635 data: 0.0792 max mem: 9377 +Train: [19] [1300/6250] eta: 0:12:47 lr: 0.000118 grad: 0.0654 (0.0664) loss: 0.8350 (0.8358) time: 0.1844 data: 0.1029 max mem: 9377 +Train: [19] [1400/6250] eta: 0:12:30 lr: 0.000118 grad: 0.0642 (0.0663) loss: 0.8307 (0.8356) time: 0.1393 data: 0.0515 max mem: 9377 +Train: [19] [1500/6250] eta: 0:12:13 lr: 0.000118 grad: 0.0667 (0.0664) loss: 0.8358 (0.8355) time: 0.1504 data: 0.0667 max mem: 9377 +Train: [19] [1600/6250] eta: 0:11:57 lr: 0.000118 grad: 0.0633 (0.0663) loss: 0.8351 (0.8354) time: 0.1602 data: 0.0783 max mem: 9377 +Train: [19] [1700/6250] eta: 0:11:41 lr: 0.000118 grad: 0.0607 (0.0662) loss: 0.8363 (0.8354) time: 0.1467 data: 0.0649 max mem: 9377 +Train: [19] [1800/6250] eta: 0:11:25 lr: 0.000118 grad: 0.0625 (0.0664) loss: 0.8299 (0.8354) time: 0.1544 data: 0.0688 max mem: 9377 +Train: [19] [1900/6250] eta: 0:11:07 lr: 0.000118 grad: 0.0604 (0.0662) loss: 0.8374 (0.8355) time: 0.1363 data: 0.0467 max mem: 9377 +Train: [19] [2000/6250] eta: 0:10:51 lr: 0.000118 grad: 0.0625 (0.0661) loss: 0.8342 (0.8356) time: 0.1274 data: 0.0474 max mem: 9377 +Train: [19] [2100/6250] eta: 0:10:35 lr: 0.000118 grad: 0.0596 (0.0660) loss: 0.8412 (0.8356) time: 0.1486 data: 0.0667 max mem: 9377 +Train: [19] [2200/6250] eta: 0:10:20 lr: 0.000118 grad: 0.0640 (0.0660) loss: 0.8374 (0.8356) time: 0.1504 data: 0.0711 max mem: 9377 +Train: [19] [2300/6250] eta: 0:10:10 lr: 0.000118 grad: 0.0659 (0.0659) loss: 0.8376 (0.8356) time: 0.1885 data: 0.0940 max mem: 9377 +Train: [19] [2400/6250] eta: 0:09:58 lr: 0.000118 grad: 0.0611 (0.0659) loss: 0.8381 (0.8356) time: 0.1958 data: 0.1125 max mem: 9377 +Train: [19] [2500/6250] eta: 0:09:45 lr: 0.000118 grad: 0.0634 (0.0661) loss: 0.8372 (0.8357) time: 0.1908 data: 0.1078 max mem: 9377 +Train: [19] [2600/6250] eta: 0:09:30 lr: 0.000118 grad: 0.0593 (0.0660) loss: 0.8392 (0.8357) time: 0.1818 data: 0.1017 max mem: 9377 +Train: [19] [2700/6250] eta: 0:09:16 lr: 0.000118 grad: 0.0645 (0.0660) loss: 0.8313 (0.8357) time: 0.1741 data: 0.0903 max mem: 9377 +Train: [19] [2800/6250] eta: 0:09:01 lr: 0.000118 grad: 0.0596 (0.0659) loss: 0.8366 (0.8357) time: 0.1843 data: 0.0943 max mem: 9377 +Train: [19] [2900/6250] eta: 0:08:46 lr: 0.000118 grad: 0.0632 (0.0659) loss: 0.8322 (0.8357) time: 0.1650 data: 0.0786 max mem: 9377 +Train: [19] [3000/6250] eta: 0:08:30 lr: 0.000118 grad: 0.0670 (0.0659) loss: 0.8342 (0.8356) time: 0.1600 data: 0.0754 max mem: 9377 +Train: [19] [3100/6250] eta: 0:08:15 lr: 0.000118 grad: 0.0648 (0.0659) loss: 0.8313 (0.8356) time: 0.1646 data: 0.0831 max mem: 9377 +Train: [19] [3200/6250] eta: 0:07:59 lr: 0.000118 grad: 0.0635 (0.0660) loss: 0.8323 (0.8356) time: 0.2069 data: 0.1294 max mem: 9377 +Train: [19] [3300/6250] eta: 0:07:43 lr: 0.000118 grad: 0.0663 (0.0659) loss: 0.8346 (0.8356) time: 0.1537 data: 0.0721 max mem: 9377 +Train: [19] [3400/6250] eta: 0:07:26 lr: 0.000118 grad: 0.0634 (0.0659) loss: 0.8305 (0.8356) time: 0.1388 data: 0.0525 max mem: 9377 +Train: [19] [3500/6250] eta: 0:07:10 lr: 0.000118 grad: 0.0620 (0.0659) loss: 0.8331 (0.8356) time: 0.1459 data: 0.0662 max mem: 9377 +Train: [19] [3600/6250] eta: 0:06:53 lr: 0.000118 grad: 0.0651 (0.0659) loss: 0.8376 (0.8356) time: 0.1349 data: 0.0595 max mem: 9377 +Train: [19] [3700/6250] eta: 0:06:37 lr: 0.000118 grad: 0.0630 (0.0659) loss: 0.8363 (0.8357) time: 0.1535 data: 0.0683 max mem: 9377 +Train: [19] [3800/6250] eta: 0:06:20 lr: 0.000118 grad: 0.0629 (0.0658) loss: 0.8373 (0.8358) time: 0.1441 data: 0.0563 max mem: 9377 +Train: [19] [3900/6250] eta: 0:06:04 lr: 0.000118 grad: 0.0614 (0.0658) loss: 0.8406 (0.8358) time: 0.1251 data: 0.0337 max mem: 9377 +Train: [19] [4000/6250] eta: 0:05:48 lr: 0.000118 grad: 0.0617 (0.0657) loss: 0.8344 (0.8359) time: 0.1352 data: 0.0482 max mem: 9377 +Train: [19] [4100/6250] eta: 0:05:32 lr: 0.000118 grad: 0.0610 (0.0657) loss: 0.8395 (0.8359) time: 0.1743 data: 0.0941 max mem: 9377 +Train: [19] [4200/6250] eta: 0:05:16 lr: 0.000118 grad: 0.0628 (0.0656) loss: 0.8409 (0.8361) time: 0.1482 data: 0.0637 max mem: 9377 +Train: [19] [4300/6250] eta: 0:05:00 lr: 0.000118 grad: 0.0598 (0.0655) loss: 0.8410 (0.8362) time: 0.1319 data: 0.0413 max mem: 9377 +Train: [19] [4400/6250] eta: 0:04:44 lr: 0.000118 grad: 0.0652 (0.0655) loss: 0.8409 (0.8362) time: 0.1357 data: 0.0595 max mem: 9377 +Train: [19] [4500/6250] eta: 0:04:28 lr: 0.000118 grad: 0.0602 (0.0655) loss: 0.8388 (0.8363) time: 0.1492 data: 0.0698 max mem: 9377 +Train: [19] [4600/6250] eta: 0:04:12 lr: 0.000118 grad: 0.0637 (0.0654) loss: 0.8352 (0.8364) time: 0.1277 data: 0.0434 max mem: 9377 +Train: [19] [4700/6250] eta: 0:03:57 lr: 0.000118 grad: 0.0656 (0.0654) loss: 0.8354 (0.8364) time: 0.1496 data: 0.0654 max mem: 9377 +Train: [19] [4800/6250] eta: 0:03:41 lr: 0.000118 grad: 0.0616 (0.0654) loss: 0.8406 (0.8364) time: 0.1511 data: 0.0685 max mem: 9377 +Train: [19] [4900/6250] eta: 0:03:26 lr: 0.000118 grad: 0.0632 (0.0654) loss: 0.8410 (0.8365) time: 0.1270 data: 0.0450 max mem: 9377 +Train: [19] [5000/6250] eta: 0:03:10 lr: 0.000118 grad: 0.0604 (0.0654) loss: 0.8394 (0.8365) time: 0.1657 data: 0.0913 max mem: 9377 +Train: [19] [5100/6250] eta: 0:02:55 lr: 0.000118 grad: 0.0612 (0.0654) loss: 0.8368 (0.8366) time: 0.1470 data: 0.0719 max mem: 9377 +Train: [19] [5200/6250] eta: 0:02:39 lr: 0.000118 grad: 0.0658 (0.0653) loss: 0.8380 (0.8366) time: 0.1299 data: 0.0407 max mem: 9377 +Train: [19] [5300/6250] eta: 0:02:24 lr: 0.000118 grad: 0.0634 (0.0654) loss: 0.8330 (0.8366) time: 0.1533 data: 0.0712 max mem: 9377 +Train: [19] [5400/6250] eta: 0:02:09 lr: 0.000118 grad: 0.0625 (0.0654) loss: 0.8355 (0.8366) time: 0.1362 data: 0.0403 max mem: 9377 +Train: [19] [5500/6250] eta: 0:01:53 lr: 0.000118 grad: 0.0633 (0.0654) loss: 0.8373 (0.8366) time: 0.1572 data: 0.0770 max mem: 9377 +Train: [19] [5600/6250] eta: 0:01:38 lr: 0.000118 grad: 0.0675 (0.0654) loss: 0.8388 (0.8366) time: 0.1374 data: 0.0510 max mem: 9377 +Train: [19] [5700/6250] eta: 0:01:23 lr: 0.000118 grad: 0.0642 (0.0654) loss: 0.8356 (0.8366) time: 0.1327 data: 0.0458 max mem: 9377 +Train: [19] [5800/6250] eta: 0:01:08 lr: 0.000118 grad: 0.0631 (0.0654) loss: 0.8388 (0.8366) time: 0.1471 data: 0.0632 max mem: 9377 +Train: [19] [5900/6250] eta: 0:00:52 lr: 0.000118 grad: 0.0655 (0.0654) loss: 0.8329 (0.8366) time: 0.1369 data: 0.0590 max mem: 9377 +Train: [19] [6000/6250] eta: 0:00:37 lr: 0.000118 grad: 0.0622 (0.0655) loss: 0.8350 (0.8366) time: 0.1343 data: 0.0501 max mem: 9377 +Train: [19] [6100/6250] eta: 0:00:22 lr: 0.000117 grad: 0.0632 (0.0654) loss: 0.8387 (0.8366) time: 0.1243 data: 0.0386 max mem: 9377 +Train: [19] [6200/6250] eta: 0:00:07 lr: 0.000117 grad: 0.0614 (0.0655) loss: 0.8381 (0.8366) time: 0.1733 data: 0.0958 max mem: 9377 +Train: [19] [6249/6250] eta: 0:00:00 lr: 0.000117 grad: 0.0614 (0.0655) loss: 0.8373 (0.8366) time: 0.1472 data: 0.0672 max mem: 9377 +Train: [19] Total time: 0:15:46 (0.1514 s / it) +Averaged stats: lr: 0.000117 grad: 0.0614 (0.0655) loss: 0.8373 (0.8366) +Eval (hcp-train-subset): [19] [ 0/62] eta: 0:04:41 loss: 0.8443 (0.8443) time: 4.5459 data: 4.5083 max mem: 9377 +Eval (hcp-train-subset): [19] [61/62] eta: 0:00:00 loss: 0.8416 (0.8410) time: 0.1466 data: 0.1211 max mem: 9377 +Eval (hcp-train-subset): [19] Total time: 0:00:14 (0.2268 s / it) +Averaged stats (hcp-train-subset): loss: 0.8416 (0.8410) +Making plots (hcp-train-subset): example=12 +Eval (hcp-val): [19] [ 0/62] eta: 0:04:58 loss: 0.8379 (0.8379) time: 4.8096 data: 4.7779 max mem: 9377 +Eval (hcp-val): [19] [61/62] eta: 0:00:00 loss: 0.8414 (0.8415) time: 0.1278 data: 0.1019 max mem: 9377 +Eval (hcp-val): [19] Total time: 0:00:13 (0.2113 s / it) +Averaged stats (hcp-val): loss: 0.8414 (0.8415) +Making plots (hcp-val): example=2 +Eval (nsd-val): [19] [ 0/62] eta: 0:03:34 loss: 0.8038 (0.8038) time: 3.4518 data: 3.3991 max mem: 9377 +Eval (nsd-val): [19] [61/62] eta: 0:00:00 loss: 0.8112 (0.8118) time: 0.1337 data: 0.1065 max mem: 9377 +Eval (nsd-val): [19] Total time: 0:00:13 (0.2108 s / it) +Averaged stats (nsd-val): loss: 0.8112 (0.8118) +Making plots (nsd-val): example=55 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00019.pth +Train: [20] [ 0/6250] eta: 9:52:30 lr: 0.000117 grad: 0.0662 (0.0662) loss: 0.8638 (0.8638) time: 5.6881 data: 5.5742 max mem: 9377 +Train: [20] [ 100/6250] eta: 0:20:19 lr: 0.000117 grad: 0.0639 (0.0702) loss: 0.8346 (0.8444) time: 0.1766 data: 0.0900 max mem: 9377 +Train: [20] [ 200/6250] eta: 0:17:12 lr: 0.000117 grad: 0.0576 (0.0714) loss: 0.8405 (0.8403) time: 0.1518 data: 0.0697 max mem: 9377 +Train: [20] [ 300/6250] eta: 0:16:15 lr: 0.000117 grad: 0.0614 (0.0703) loss: 0.8391 (0.8396) time: 0.1568 data: 0.0740 max mem: 9377 +Train: [20] [ 400/6250] eta: 0:15:25 lr: 0.000117 grad: 0.0642 (0.0688) loss: 0.8347 (0.8394) time: 0.1460 data: 0.0580 max mem: 9377 +Train: [20] [ 500/6250] eta: 0:14:44 lr: 0.000117 grad: 0.0596 (0.0676) loss: 0.8370 (0.8396) time: 0.1353 data: 0.0492 max mem: 9377 +Train: [20] [ 600/6250] eta: 0:14:12 lr: 0.000117 grad: 0.0591 (0.0675) loss: 0.8388 (0.8395) time: 0.1468 data: 0.0625 max mem: 9377 +Train: [20] [ 700/6250] eta: 0:13:50 lr: 0.000117 grad: 0.0594 (0.0667) loss: 0.8461 (0.8393) time: 0.1780 data: 0.0926 max mem: 9377 +Train: [20] [ 800/6250] eta: 0:13:29 lr: 0.000117 grad: 0.0657 (0.0666) loss: 0.8436 (0.8391) time: 0.1327 data: 0.0484 max mem: 9377 +Train: [20] [ 900/6250] eta: 0:13:15 lr: 0.000117 grad: 0.0623 (0.0666) loss: 0.8434 (0.8386) time: 0.1639 data: 0.0790 max mem: 9377 +Train: [20] [1000/6250] eta: 0:13:03 lr: 0.000117 grad: 0.0605 (0.0661) loss: 0.8398 (0.8384) time: 0.1596 data: 0.0691 max mem: 9377 +Train: [20] [1100/6250] eta: 0:12:50 lr: 0.000117 grad: 0.0604 (0.0659) loss: 0.8355 (0.8380) time: 0.1504 data: 0.0688 max mem: 9377 +Train: [20] [1200/6250] eta: 0:12:35 lr: 0.000117 grad: 0.0653 (0.0660) loss: 0.8375 (0.8377) time: 0.1705 data: 0.0832 max mem: 9377 +Train: [20] [1300/6250] eta: 0:12:19 lr: 0.000117 grad: 0.0622 (0.0660) loss: 0.8341 (0.8374) time: 0.1586 data: 0.0779 max mem: 9377 +Train: [20] [1400/6250] eta: 0:12:07 lr: 0.000117 grad: 0.0632 (0.0659) loss: 0.8348 (0.8372) time: 0.1588 data: 0.0781 max mem: 9377 +Train: [20] [1500/6250] eta: 0:11:54 lr: 0.000117 grad: 0.0642 (0.0659) loss: 0.8349 (0.8368) time: 0.1580 data: 0.0679 max mem: 9377 +Train: [20] [1600/6250] eta: 0:11:41 lr: 0.000117 grad: 0.0730 (0.0661) loss: 0.8251 (0.8366) time: 0.1494 data: 0.0608 max mem: 9377 +Train: [20] [1700/6250] eta: 0:11:26 lr: 0.000117 grad: 0.0600 (0.0661) loss: 0.8350 (0.8364) time: 0.1306 data: 0.0479 max mem: 9377 +Train: [20] [1800/6250] eta: 0:11:13 lr: 0.000117 grad: 0.0648 (0.0662) loss: 0.8305 (0.8363) time: 0.1645 data: 0.0898 max mem: 9377 +Train: [20] [1900/6250] eta: 0:10:59 lr: 0.000117 grad: 0.0605 (0.0661) loss: 0.8375 (0.8361) time: 0.1717 data: 0.0812 max mem: 9377 +Train: [20] [2000/6250] eta: 0:10:45 lr: 0.000117 grad: 0.0657 (0.0661) loss: 0.8330 (0.8360) time: 0.1710 data: 0.0914 max mem: 9377 +Train: [20] [2100/6250] eta: 0:10:33 lr: 0.000117 grad: 0.0624 (0.0661) loss: 0.8309 (0.8359) time: 0.1598 data: 0.0755 max mem: 9377 +Train: [20] [2200/6250] eta: 0:10:19 lr: 0.000117 grad: 0.0642 (0.0661) loss: 0.8348 (0.8358) time: 0.1525 data: 0.0724 max mem: 9377 +Train: [20] [2300/6250] eta: 0:10:04 lr: 0.000117 grad: 0.0629 (0.0661) loss: 0.8385 (0.8357) time: 0.1463 data: 0.0676 max mem: 9377 +Train: [20] [2400/6250] eta: 0:09:48 lr: 0.000117 grad: 0.0643 (0.0661) loss: 0.8357 (0.8357) time: 0.1396 data: 0.0538 max mem: 9377 +Train: [20] [2500/6250] eta: 0:09:32 lr: 0.000117 grad: 0.0634 (0.0661) loss: 0.8345 (0.8356) time: 0.1501 data: 0.0662 max mem: 9377 +Train: [20] [2600/6250] eta: 0:09:18 lr: 0.000117 grad: 0.0635 (0.0661) loss: 0.8400 (0.8357) time: 0.1435 data: 0.0491 max mem: 9377 +Train: [20] [2700/6250] eta: 0:09:03 lr: 0.000117 grad: 0.0618 (0.0660) loss: 0.8371 (0.8357) time: 0.1535 data: 0.0648 max mem: 9377 +Train: [20] [2800/6250] eta: 0:08:47 lr: 0.000117 grad: 0.0615 (0.0661) loss: 0.8356 (0.8358) time: 0.1468 data: 0.0594 max mem: 9377 +Train: [20] [2900/6250] eta: 0:08:31 lr: 0.000117 grad: 0.0609 (0.0660) loss: 0.8397 (0.8358) time: 0.1456 data: 0.0589 max mem: 9377 +Train: [20] [3000/6250] eta: 0:08:17 lr: 0.000117 grad: 0.0700 (0.0661) loss: 0.8400 (0.8358) time: 0.1950 data: 0.1148 max mem: 9377 +Train: [20] [3100/6250] eta: 0:08:02 lr: 0.000117 grad: 0.0657 (0.0661) loss: 0.8364 (0.8358) time: 0.1573 data: 0.0802 max mem: 9377 +Train: [20] [3200/6250] eta: 0:07:47 lr: 0.000117 grad: 0.0700 (0.0663) loss: 0.8355 (0.8358) time: 0.1750 data: 0.0912 max mem: 9377 +Train: [20] [3300/6250] eta: 0:07:31 lr: 0.000117 grad: 0.0662 (0.0663) loss: 0.8359 (0.8358) time: 0.1525 data: 0.0711 max mem: 9377 +Train: [20] [3400/6250] eta: 0:07:16 lr: 0.000117 grad: 0.0640 (0.0663) loss: 0.8331 (0.8358) time: 0.1624 data: 0.0768 max mem: 9377 +Train: [20] [3500/6250] eta: 0:07:01 lr: 0.000117 grad: 0.0651 (0.0663) loss: 0.8341 (0.8357) time: 0.1541 data: 0.0653 max mem: 9377 +Train: [20] [3600/6250] eta: 0:06:46 lr: 0.000117 grad: 0.0621 (0.0663) loss: 0.8358 (0.8358) time: 0.1529 data: 0.0615 max mem: 9377 +Train: [20] [3700/6250] eta: 0:06:30 lr: 0.000117 grad: 0.0633 (0.0664) loss: 0.8334 (0.8357) time: 0.1613 data: 0.0811 max mem: 9377 +Train: [20] [3800/6250] eta: 0:06:15 lr: 0.000117 grad: 0.0659 (0.0664) loss: 0.8297 (0.8357) time: 0.1568 data: 0.0684 max mem: 9377 +Train: [20] [3900/6250] eta: 0:05:59 lr: 0.000117 grad: 0.0634 (0.0664) loss: 0.8352 (0.8357) time: 0.1415 data: 0.0637 max mem: 9377 +Train: [20] [4000/6250] eta: 0:05:44 lr: 0.000117 grad: 0.0667 (0.0664) loss: 0.8328 (0.8357) time: 0.1544 data: 0.0733 max mem: 9377 +Train: [20] [4100/6250] eta: 0:05:29 lr: 0.000117 grad: 0.0643 (0.0665) loss: 0.8357 (0.8357) time: 0.1716 data: 0.0911 max mem: 9377 +Train: [20] [4200/6250] eta: 0:05:13 lr: 0.000117 grad: 0.0677 (0.0665) loss: 0.8351 (0.8357) time: 0.1413 data: 0.0446 max mem: 9377 +Train: [20] [4300/6250] eta: 0:04:58 lr: 0.000117 grad: 0.0647 (0.0665) loss: 0.8392 (0.8357) time: 0.1271 data: 0.0496 max mem: 9377 +Train: [20] [4400/6250] eta: 0:04:42 lr: 0.000117 grad: 0.0657 (0.0666) loss: 0.8371 (0.8357) time: 0.1364 data: 0.0490 max mem: 9377 +Train: [20] [4500/6250] eta: 0:04:27 lr: 0.000117 grad: 0.0630 (0.0666) loss: 0.8378 (0.8357) time: 0.1489 data: 0.0706 max mem: 9377 +Train: [20] [4600/6250] eta: 0:04:11 lr: 0.000117 grad: 0.0654 (0.0666) loss: 0.8309 (0.8357) time: 0.1292 data: 0.0444 max mem: 9377 +Train: [20] [4700/6250] eta: 0:03:56 lr: 0.000117 grad: 0.0647 (0.0667) loss: 0.8355 (0.8356) time: 0.1479 data: 0.0712 max mem: 9377 +Train: [20] [4800/6250] eta: 0:03:40 lr: 0.000117 grad: 0.0669 (0.0667) loss: 0.8316 (0.8356) time: 0.1491 data: 0.0714 max mem: 9377 +Train: [20] [4900/6250] eta: 0:03:25 lr: 0.000117 grad: 0.0657 (0.0666) loss: 0.8330 (0.8356) time: 0.1441 data: 0.0647 max mem: 9377 +Train: [20] [5000/6250] eta: 0:03:10 lr: 0.000117 grad: 0.0647 (0.0666) loss: 0.8307 (0.8355) time: 0.1719 data: 0.0927 max mem: 9377 +Train: [20] [5100/6250] eta: 0:02:54 lr: 0.000117 grad: 0.0629 (0.0666) loss: 0.8384 (0.8355) time: 0.1436 data: 0.0612 max mem: 9377 +Train: [20] [5200/6250] eta: 0:02:39 lr: 0.000117 grad: 0.0670 (0.0667) loss: 0.8340 (0.8355) time: 0.1587 data: 0.0820 max mem: 9377 +Train: [20] [5300/6250] eta: 0:02:24 lr: 0.000117 grad: 0.0671 (0.0667) loss: 0.8340 (0.8354) time: 0.1459 data: 0.0674 max mem: 9377 +Train: [20] [5400/6250] eta: 0:02:08 lr: 0.000117 grad: 0.0659 (0.0668) loss: 0.8397 (0.8354) time: 0.1546 data: 0.0773 max mem: 9377 +Train: [20] [5500/6250] eta: 0:01:53 lr: 0.000117 grad: 0.0648 (0.0668) loss: 0.8390 (0.8354) time: 0.1465 data: 0.0618 max mem: 9377 +Train: [20] [5600/6250] eta: 0:01:38 lr: 0.000117 grad: 0.0657 (0.0668) loss: 0.8356 (0.8354) time: 0.1441 data: 0.0568 max mem: 9377 +Train: [20] [5700/6250] eta: 0:01:23 lr: 0.000117 grad: 0.0649 (0.0668) loss: 0.8337 (0.8354) time: 0.1559 data: 0.0682 max mem: 9377 +Train: [20] [5800/6250] eta: 0:01:08 lr: 0.000117 grad: 0.0670 (0.0668) loss: 0.8379 (0.8354) time: 0.1508 data: 0.0690 max mem: 9377 +Train: [20] [5900/6250] eta: 0:00:52 lr: 0.000117 grad: 0.0645 (0.0668) loss: 0.8368 (0.8355) time: 0.1445 data: 0.0633 max mem: 9377 +Train: [20] [6000/6250] eta: 0:00:37 lr: 0.000116 grad: 0.0656 (0.0668) loss: 0.8379 (0.8355) time: 0.1475 data: 0.0657 max mem: 9377 +Train: [20] [6100/6250] eta: 0:00:22 lr: 0.000116 grad: 0.0638 (0.0668) loss: 0.8345 (0.8355) time: 0.1400 data: 0.0586 max mem: 9377 +Train: [20] [6200/6250] eta: 0:00:07 lr: 0.000116 grad: 0.0616 (0.0668) loss: 0.8363 (0.8355) time: 0.1626 data: 0.0837 max mem: 9377 +Train: [20] [6249/6250] eta: 0:00:00 lr: 0.000116 grad: 0.0621 (0.0668) loss: 0.8408 (0.8356) time: 0.1530 data: 0.0691 max mem: 9377 +Train: [20] Total time: 0:15:49 (0.1519 s / it) +Averaged stats: lr: 0.000116 grad: 0.0621 (0.0668) loss: 0.8408 (0.8356) +Eval (hcp-train-subset): [20] [ 0/62] eta: 0:05:29 loss: 0.8425 (0.8425) time: 5.3210 data: 5.2868 max mem: 9377 +Eval (hcp-train-subset): [20] [61/62] eta: 0:00:00 loss: 0.8379 (0.8405) time: 0.1393 data: 0.1134 max mem: 9377 +Eval (hcp-train-subset): [20] Total time: 0:00:14 (0.2318 s / it) +Averaged stats (hcp-train-subset): loss: 0.8379 (0.8405) +Eval (hcp-val): [20] [ 0/62] eta: 0:05:29 loss: 0.8382 (0.8382) time: 5.3094 data: 5.2799 max mem: 9377 +Eval (hcp-val): [20] [61/62] eta: 0:00:00 loss: 0.8418 (0.8418) time: 0.1247 data: 0.0994 max mem: 9377 +Eval (hcp-val): [20] Total time: 0:00:13 (0.2131 s / it) +Averaged stats (hcp-val): loss: 0.8418 (0.8418) +Eval (nsd-val): [20] [ 0/62] eta: 0:02:57 loss: 0.8009 (0.8009) time: 2.8612 data: 2.7759 max mem: 9377 +Eval (nsd-val): [20] [61/62] eta: 0:00:00 loss: 0.8137 (0.8143) time: 0.1223 data: 0.0972 max mem: 9377 +Eval (nsd-val): [20] Total time: 0:00:12 (0.2039 s / it) +Averaged stats (nsd-val): loss: 0.8137 (0.8143) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [21] [ 0/6250] eta: 7:38:37 lr: 0.000116 grad: 0.0832 (0.0832) loss: 0.8714 (0.8714) time: 4.4028 data: 4.1256 max mem: 9377 +Train: [21] [ 100/6250] eta: 0:20:08 lr: 0.000116 grad: 0.0698 (0.0765) loss: 0.8383 (0.8299) time: 0.1553 data: 0.0665 max mem: 9377 +Train: [21] [ 200/6250] eta: 0:17:46 lr: 0.000116 grad: 0.0604 (0.0732) loss: 0.8395 (0.8294) time: 0.1431 data: 0.0550 max mem: 9377 +Train: [21] [ 300/6250] eta: 0:17:00 lr: 0.000116 grad: 0.0685 (0.0721) loss: 0.8334 (0.8308) time: 0.1985 data: 0.1160 max mem: 9377 +Train: [21] [ 400/6250] eta: 0:15:55 lr: 0.000116 grad: 0.0697 (0.0715) loss: 0.8348 (0.8315) time: 0.1437 data: 0.0554 max mem: 9377 +Train: [21] [ 500/6250] eta: 0:15:09 lr: 0.000116 grad: 0.0623 (0.0712) loss: 0.8322 (0.8321) time: 0.1140 data: 0.0298 max mem: 9377 +Train: [21] [ 600/6250] eta: 0:14:36 lr: 0.000116 grad: 0.0645 (0.0712) loss: 0.8409 (0.8323) time: 0.1412 data: 0.0536 max mem: 9377 +Train: [21] [ 700/6250] eta: 0:14:22 lr: 0.000116 grad: 0.0657 (0.0707) loss: 0.8352 (0.8324) time: 0.1553 data: 0.0700 max mem: 9377 +Train: [21] [ 800/6250] eta: 0:14:10 lr: 0.000116 grad: 0.0701 (0.0704) loss: 0.8363 (0.8328) time: 0.1776 data: 0.0997 max mem: 9377 +Train: [21] [ 900/6250] eta: 0:14:02 lr: 0.000116 grad: 0.0680 (0.0699) loss: 0.8277 (0.8330) time: 0.1572 data: 0.0734 max mem: 9377 +Train: [21] [1000/6250] eta: 0:13:54 lr: 0.000116 grad: 0.0656 (0.0699) loss: 0.8366 (0.8334) time: 0.1467 data: 0.0665 max mem: 9377 +Train: [21] [1100/6250] eta: 0:13:43 lr: 0.000116 grad: 0.0670 (0.0698) loss: 0.8350 (0.8337) time: 0.1786 data: 0.0913 max mem: 9377 +Train: [21] [1200/6250] eta: 0:13:34 lr: 0.000116 grad: 0.0664 (0.0700) loss: 0.8332 (0.8337) time: 0.1606 data: 0.0778 max mem: 9377 +Train: [21] [1300/6250] eta: 0:13:23 lr: 0.000116 grad: 0.0672 (0.0701) loss: 0.8341 (0.8338) time: 0.1785 data: 0.0962 max mem: 9377 +Train: [21] [1400/6250] eta: 0:13:07 lr: 0.000116 grad: 0.0711 (0.0701) loss: 0.8359 (0.8340) time: 0.1704 data: 0.0819 max mem: 9377 +Train: [21] [1500/6250] eta: 0:12:48 lr: 0.000116 grad: 0.0615 (0.0699) loss: 0.8364 (0.8341) time: 0.1651 data: 0.0863 max mem: 9377 +Train: [21] [1600/6250] eta: 0:12:31 lr: 0.000116 grad: 0.0653 (0.0698) loss: 0.8383 (0.8342) time: 0.1681 data: 0.0865 max mem: 9377 +Train: [21] [1700/6250] eta: 0:12:19 lr: 0.000116 grad: 0.0654 (0.0697) loss: 0.8348 (0.8342) time: 0.1551 data: 0.0620 max mem: 9377 +Train: [21] [1800/6250] eta: 0:12:01 lr: 0.000116 grad: 0.0664 (0.0696) loss: 0.8380 (0.8342) time: 0.1593 data: 0.0722 max mem: 9377 +Train: [21] [1900/6250] eta: 0:11:43 lr: 0.000116 grad: 0.0664 (0.0695) loss: 0.8364 (0.8343) time: 0.1404 data: 0.0559 max mem: 9377 +Train: [21] [2000/6250] eta: 0:11:25 lr: 0.000116 grad: 0.0666 (0.0695) loss: 0.8334 (0.8344) time: 0.1566 data: 0.0713 max mem: 9377 +Train: [21] [2100/6250] eta: 0:11:06 lr: 0.000116 grad: 0.0659 (0.0694) loss: 0.8338 (0.8345) time: 0.1141 data: 0.0313 max mem: 9377 +Train: [21] [2200/6250] eta: 0:10:48 lr: 0.000116 grad: 0.0673 (0.0693) loss: 0.8378 (0.8346) time: 0.1447 data: 0.0603 max mem: 9377 +Train: [21] [2300/6250] eta: 0:10:28 lr: 0.000116 grad: 0.0635 (0.0692) loss: 0.8407 (0.8347) time: 0.1462 data: 0.0611 max mem: 9377 +Train: [21] [2400/6250] eta: 0:10:09 lr: 0.000116 grad: 0.0644 (0.0691) loss: 0.8363 (0.8348) time: 0.1241 data: 0.0270 max mem: 9377 +Train: [21] [2500/6250] eta: 0:09:50 lr: 0.000116 grad: 0.0706 (0.0691) loss: 0.8404 (0.8349) time: 0.1334 data: 0.0503 max mem: 9377 +Train: [21] [2600/6250] eta: 0:09:31 lr: 0.000116 grad: 0.0638 (0.0690) loss: 0.8400 (0.8350) time: 0.1233 data: 0.0386 max mem: 9377 +Train: [21] [2700/6250] eta: 0:09:16 lr: 0.000116 grad: 0.0625 (0.0690) loss: 0.8357 (0.8350) time: 0.1477 data: 0.0495 max mem: 9377 +Train: [21] [2800/6250] eta: 0:09:00 lr: 0.000116 grad: 0.0732 (0.0689) loss: 0.8336 (0.8350) time: 0.1547 data: 0.0700 max mem: 9377 +Train: [21] [2900/6250] eta: 0:08:43 lr: 0.000116 grad: 0.0707 (0.0689) loss: 0.8368 (0.8350) time: 0.1407 data: 0.0559 max mem: 9377 +Train: [21] [3000/6250] eta: 0:08:26 lr: 0.000116 grad: 0.0637 (0.0689) loss: 0.8408 (0.8351) time: 0.1570 data: 0.0698 max mem: 9377 +Train: [21] [3100/6250] eta: 0:08:09 lr: 0.000116 grad: 0.0659 (0.0688) loss: 0.8390 (0.8352) time: 0.1360 data: 0.0540 max mem: 9377 +Train: [21] [3200/6250] eta: 0:07:52 lr: 0.000116 grad: 0.0661 (0.0687) loss: 0.8346 (0.8352) time: 0.1269 data: 0.0389 max mem: 9377 +Train: [21] [3300/6250] eta: 0:07:35 lr: 0.000116 grad: 0.0640 (0.0686) loss: 0.8327 (0.8352) time: 0.1389 data: 0.0577 max mem: 9377 +Train: [21] [3400/6250] eta: 0:07:19 lr: 0.000116 grad: 0.0650 (0.0686) loss: 0.8374 (0.8353) time: 0.1384 data: 0.0480 max mem: 9377 +Train: [21] [3500/6250] eta: 0:07:03 lr: 0.000116 grad: 0.0685 (0.0687) loss: 0.8372 (0.8353) time: 0.1618 data: 0.0756 max mem: 9377 +Train: [21] [3600/6250] eta: 0:06:46 lr: 0.000116 grad: 0.0660 (0.0686) loss: 0.8364 (0.8354) time: 0.1468 data: 0.0642 max mem: 9377 +Train: [21] [3700/6250] eta: 0:06:30 lr: 0.000116 grad: 0.0673 (0.0686) loss: 0.8338 (0.8355) time: 0.1221 data: 0.0368 max mem: 9377 +Train: [21] [3800/6250] eta: 0:06:14 lr: 0.000116 grad: 0.0633 (0.0686) loss: 0.8421 (0.8356) time: 0.1367 data: 0.0508 max mem: 9377 +Train: [21] [3900/6250] eta: 0:05:58 lr: 0.000116 grad: 0.0663 (0.0685) loss: 0.8254 (0.8356) time: 0.1341 data: 0.0505 max mem: 9377 +Train: [21] [4000/6250] eta: 0:05:42 lr: 0.000116 grad: 0.0645 (0.0685) loss: 0.8363 (0.8356) time: 0.1282 data: 0.0462 max mem: 9377 +Train: [21] [4100/6250] eta: 0:05:26 lr: 0.000116 grad: 0.0632 (0.0684) loss: 0.8383 (0.8357) time: 0.1382 data: 0.0465 max mem: 9377 +Train: [21] [4200/6250] eta: 0:05:11 lr: 0.000116 grad: 0.0628 (0.0684) loss: 0.8387 (0.8358) time: 0.1538 data: 0.0730 max mem: 9377 +Train: [21] [4300/6250] eta: 0:04:55 lr: 0.000116 grad: 0.0649 (0.0684) loss: 0.8374 (0.8359) time: 0.1537 data: 0.0686 max mem: 9377 +Train: [21] [4400/6250] eta: 0:04:40 lr: 0.000116 grad: 0.0670 (0.0684) loss: 0.8346 (0.8359) time: 0.1482 data: 0.0672 max mem: 9377 +Train: [21] [4500/6250] eta: 0:04:24 lr: 0.000116 grad: 0.0672 (0.0684) loss: 0.8360 (0.8359) time: 0.1541 data: 0.0708 max mem: 9377 +Train: [21] [4600/6250] eta: 0:04:09 lr: 0.000116 grad: 0.0662 (0.0684) loss: 0.8368 (0.8359) time: 0.1307 data: 0.0450 max mem: 9377 +Train: [21] [4700/6250] eta: 0:03:53 lr: 0.000116 grad: 0.0700 (0.0685) loss: 0.8328 (0.8359) time: 0.1494 data: 0.0673 max mem: 9377 +Train: [21] [4800/6250] eta: 0:03:38 lr: 0.000116 grad: 0.0662 (0.0685) loss: 0.8376 (0.8359) time: 0.1418 data: 0.0603 max mem: 9377 +Train: [21] [4900/6250] eta: 0:03:23 lr: 0.000116 grad: 0.0639 (0.0684) loss: 0.8415 (0.8359) time: 0.1655 data: 0.0864 max mem: 9377 +Train: [21] [5000/6250] eta: 0:03:07 lr: 0.000116 grad: 0.0643 (0.0684) loss: 0.8366 (0.8358) time: 0.1586 data: 0.0770 max mem: 9377 +Train: [21] [5100/6250] eta: 0:02:52 lr: 0.000116 grad: 0.0679 (0.0684) loss: 0.8377 (0.8358) time: 0.1333 data: 0.0480 max mem: 9377 +Train: [21] [5200/6250] eta: 0:02:37 lr: 0.000116 grad: 0.0641 (0.0684) loss: 0.8374 (0.8358) time: 0.1414 data: 0.0547 max mem: 9377 +Train: [21] [5300/6250] eta: 0:02:22 lr: 0.000116 grad: 0.0625 (0.0684) loss: 0.8396 (0.8358) time: 0.1411 data: 0.0601 max mem: 9377 +Train: [21] [5400/6250] eta: 0:02:07 lr: 0.000116 grad: 0.0630 (0.0683) loss: 0.8379 (0.8359) time: 0.1368 data: 0.0503 max mem: 9377 +Train: [21] [5500/6250] eta: 0:01:52 lr: 0.000116 grad: 0.0652 (0.0682) loss: 0.8343 (0.8358) time: 0.1421 data: 0.0558 max mem: 9377 +Train: [21] [5600/6250] eta: 0:01:37 lr: 0.000115 grad: 0.0644 (0.0682) loss: 0.8351 (0.8358) time: 0.1379 data: 0.0576 max mem: 9377 +Train: [21] [5700/6250] eta: 0:01:22 lr: 0.000115 grad: 0.0649 (0.0682) loss: 0.8340 (0.8358) time: 0.1271 data: 0.0340 max mem: 9377 +Train: [21] [5800/6250] eta: 0:01:07 lr: 0.000115 grad: 0.0648 (0.0682) loss: 0.8321 (0.8358) time: 0.1342 data: 0.0478 max mem: 9377 +Train: [21] [5900/6250] eta: 0:00:52 lr: 0.000115 grad: 0.0654 (0.0682) loss: 0.8305 (0.8358) time: 0.1246 data: 0.0383 max mem: 9377 +Train: [21] [6000/6250] eta: 0:00:37 lr: 0.000115 grad: 0.0691 (0.0682) loss: 0.8274 (0.8357) time: 0.1583 data: 0.0776 max mem: 9377 +Train: [21] [6100/6250] eta: 0:00:22 lr: 0.000115 grad: 0.0665 (0.0682) loss: 0.8360 (0.8357) time: 0.1408 data: 0.0493 max mem: 9377 +Train: [21] [6200/6250] eta: 0:00:07 lr: 0.000115 grad: 0.0632 (0.0681) loss: 0.8348 (0.8357) time: 0.1360 data: 0.0518 max mem: 9377 +Train: [21] [6249/6250] eta: 0:00:00 lr: 0.000115 grad: 0.0657 (0.0681) loss: 0.8295 (0.8356) time: 0.1344 data: 0.0537 max mem: 9377 +Train: [21] Total time: 0:15:35 (0.1496 s / it) +Averaged stats: lr: 0.000115 grad: 0.0657 (0.0681) loss: 0.8295 (0.8356) +Eval (hcp-train-subset): [21] [ 0/62] eta: 0:06:21 loss: 0.8375 (0.8375) time: 6.1509 data: 6.1183 max mem: 9377 +Eval (hcp-train-subset): [21] [61/62] eta: 0:00:00 loss: 0.8397 (0.8402) time: 0.1326 data: 0.1071 max mem: 9377 +Eval (hcp-train-subset): [21] Total time: 0:00:14 (0.2341 s / it) +Averaged stats (hcp-train-subset): loss: 0.8397 (0.8402) +Eval (hcp-val): [21] [ 0/62] eta: 0:03:30 loss: 0.8383 (0.8383) time: 3.4010 data: 3.3248 max mem: 9377 +Eval (hcp-val): [21] [61/62] eta: 0:00:00 loss: 0.8403 (0.8411) time: 0.1329 data: 0.1077 max mem: 9377 +Eval (hcp-val): [21] Total time: 0:00:13 (0.2138 s / it) +Averaged stats (hcp-val): loss: 0.8403 (0.8411) +Eval (nsd-val): [21] [ 0/62] eta: 0:04:36 loss: 0.7995 (0.7995) time: 4.4594 data: 4.4286 max mem: 9377 +Eval (nsd-val): [21] [61/62] eta: 0:00:00 loss: 0.8115 (0.8121) time: 0.1153 data: 0.0900 max mem: 9377 +Eval (nsd-val): [21] Total time: 0:00:13 (0.2133 s / it) +Averaged stats (nsd-val): loss: 0.8115 (0.8121) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [22] [ 0/6250] eta: 7:28:29 lr: 0.000115 grad: 0.0601 (0.0601) loss: 0.8419 (0.8419) time: 4.3056 data: 4.0089 max mem: 9377 +Train: [22] [ 100/6250] eta: 0:19:51 lr: 0.000115 grad: 0.0715 (0.0731) loss: 0.8350 (0.8396) time: 0.1679 data: 0.0711 max mem: 9377 +Train: [22] [ 200/6250] eta: 0:17:14 lr: 0.000115 grad: 0.0656 (0.0728) loss: 0.8333 (0.8358) time: 0.1487 data: 0.0586 max mem: 9377 +Train: [22] [ 300/6250] eta: 0:16:13 lr: 0.000115 grad: 0.0651 (0.0714) loss: 0.8323 (0.8353) time: 0.1576 data: 0.0706 max mem: 9377 +Train: [22] [ 400/6250] eta: 0:15:17 lr: 0.000115 grad: 0.0656 (0.0707) loss: 0.8380 (0.8354) time: 0.1233 data: 0.0246 max mem: 9377 +Train: [22] [ 500/6250] eta: 0:14:45 lr: 0.000115 grad: 0.0665 (0.0701) loss: 0.8411 (0.8364) time: 0.1561 data: 0.0719 max mem: 9377 +Train: [22] [ 600/6250] eta: 0:14:17 lr: 0.000115 grad: 0.0653 (0.0700) loss: 0.8375 (0.8366) time: 0.1399 data: 0.0563 max mem: 9377 +Train: [22] [ 700/6250] eta: 0:14:00 lr: 0.000115 grad: 0.0688 (0.0700) loss: 0.8336 (0.8364) time: 0.1654 data: 0.0862 max mem: 9377 +Train: [22] [ 800/6250] eta: 0:13:43 lr: 0.000115 grad: 0.0675 (0.0698) loss: 0.8405 (0.8365) time: 0.1671 data: 0.0850 max mem: 9377 +Train: [22] [ 900/6250] eta: 0:13:30 lr: 0.000115 grad: 0.0623 (0.0695) loss: 0.8364 (0.8365) time: 0.1438 data: 0.0581 max mem: 9377 +Train: [22] [1000/6250] eta: 0:13:13 lr: 0.000115 grad: 0.0614 (0.0695) loss: 0.8335 (0.8364) time: 0.1679 data: 0.0865 max mem: 9377 +Train: [22] [1100/6250] eta: 0:12:57 lr: 0.000115 grad: 0.0628 (0.0694) loss: 0.8338 (0.8362) time: 0.1563 data: 0.0760 max mem: 9377 +Train: [22] [1200/6250] eta: 0:12:43 lr: 0.000115 grad: 0.0625 (0.0692) loss: 0.8340 (0.8362) time: 0.1649 data: 0.0881 max mem: 9377 +Train: [22] [1300/6250] eta: 0:12:35 lr: 0.000115 grad: 0.0656 (0.0691) loss: 0.8326 (0.8361) time: 0.1972 data: 0.1166 max mem: 9377 +Train: [22] [1400/6250] eta: 0:12:26 lr: 0.000115 grad: 0.0660 (0.0689) loss: 0.8379 (0.8361) time: 0.1821 data: 0.1092 max mem: 9377 +Train: [22] [1500/6250] eta: 0:12:14 lr: 0.000115 grad: 0.0696 (0.0688) loss: 0.8298 (0.8358) time: 0.1489 data: 0.0705 max mem: 9377 +Train: [22] [1600/6250] eta: 0:11:58 lr: 0.000115 grad: 0.0628 (0.0687) loss: 0.8333 (0.8357) time: 0.1626 data: 0.0737 max mem: 9377 +Train: [22] [1700/6250] eta: 0:11:43 lr: 0.000115 grad: 0.0634 (0.0686) loss: 0.8341 (0.8355) time: 0.1662 data: 0.0859 max mem: 9377 +Train: [22] [1800/6250] eta: 0:11:28 lr: 0.000115 grad: 0.0617 (0.0686) loss: 0.8380 (0.8355) time: 0.1571 data: 0.0768 max mem: 9377 +Train: [22] [1900/6250] eta: 0:11:10 lr: 0.000115 grad: 0.0621 (0.0685) loss: 0.8350 (0.8354) time: 0.1439 data: 0.0599 max mem: 9377 +Train: [22] [2000/6250] eta: 0:10:51 lr: 0.000115 grad: 0.0619 (0.0684) loss: 0.8334 (0.8354) time: 0.1319 data: 0.0417 max mem: 9377 +Train: [22] [2100/6250] eta: 0:10:34 lr: 0.000115 grad: 0.0647 (0.0683) loss: 0.8346 (0.8353) time: 0.1539 data: 0.0763 max mem: 9377 +Train: [22] [2200/6250] eta: 0:10:15 lr: 0.000115 grad: 0.0662 (0.0683) loss: 0.8371 (0.8355) time: 0.1381 data: 0.0533 max mem: 9377 +Train: [22] [2300/6250] eta: 0:09:57 lr: 0.000115 grad: 0.0671 (0.0683) loss: 0.8386 (0.8355) time: 0.1286 data: 0.0354 max mem: 9377 +Train: [22] [2400/6250] eta: 0:09:39 lr: 0.000115 grad: 0.0648 (0.0682) loss: 0.8339 (0.8355) time: 0.1555 data: 0.0743 max mem: 9377 +Train: [22] [2500/6250] eta: 0:09:26 lr: 0.000115 grad: 0.0649 (0.0681) loss: 0.8347 (0.8355) time: 0.1434 data: 0.0615 max mem: 9377 +Train: [22] [2600/6250] eta: 0:09:10 lr: 0.000115 grad: 0.0647 (0.0681) loss: 0.8341 (0.8354) time: 0.1390 data: 0.0571 max mem: 9377 +Train: [22] [2700/6250] eta: 0:08:54 lr: 0.000115 grad: 0.0713 (0.0681) loss: 0.8351 (0.8353) time: 0.1463 data: 0.0647 max mem: 9377 +Train: [22] [2800/6250] eta: 0:08:38 lr: 0.000115 grad: 0.0691 (0.0681) loss: 0.8357 (0.8353) time: 0.1349 data: 0.0508 max mem: 9377 +Train: [22] [2900/6250] eta: 0:08:22 lr: 0.000115 grad: 0.0656 (0.0680) loss: 0.8315 (0.8352) time: 0.1319 data: 0.0490 max mem: 9377 +Train: [22] [3000/6250] eta: 0:08:05 lr: 0.000115 grad: 0.0635 (0.0680) loss: 0.8405 (0.8353) time: 0.1402 data: 0.0575 max mem: 9377 +Train: [22] [3100/6250] eta: 0:07:49 lr: 0.000115 grad: 0.0628 (0.0680) loss: 0.8349 (0.8352) time: 0.1358 data: 0.0548 max mem: 9377 +Train: [22] [3200/6250] eta: 0:07:32 lr: 0.000115 grad: 0.0654 (0.0680) loss: 0.8393 (0.8352) time: 0.1348 data: 0.0516 max mem: 9377 +Train: [22] [3300/6250] eta: 0:07:16 lr: 0.000115 grad: 0.0658 (0.0680) loss: 0.8325 (0.8352) time: 0.1347 data: 0.0434 max mem: 9377 +Train: [22] [3400/6250] eta: 0:07:01 lr: 0.000115 grad: 0.0634 (0.0679) loss: 0.8390 (0.8353) time: 0.1351 data: 0.0441 max mem: 9377 +Train: [22] [3500/6250] eta: 0:06:46 lr: 0.000115 grad: 0.0691 (0.0679) loss: 0.8347 (0.8353) time: 0.1288 data: 0.0394 max mem: 9377 +Train: [22] [3600/6250] eta: 0:06:31 lr: 0.000115 grad: 0.0645 (0.0678) loss: 0.8390 (0.8354) time: 0.1374 data: 0.0505 max mem: 9377 +Train: [22] [3700/6250] eta: 0:06:16 lr: 0.000115 grad: 0.0641 (0.0678) loss: 0.8369 (0.8354) time: 0.1325 data: 0.0515 max mem: 9377 +Train: [22] [3800/6250] eta: 0:06:01 lr: 0.000115 grad: 0.0677 (0.0679) loss: 0.8392 (0.8354) time: 0.1596 data: 0.0777 max mem: 9377 +Train: [22] [3900/6250] eta: 0:05:45 lr: 0.000115 grad: 0.0658 (0.0679) loss: 0.8406 (0.8355) time: 0.1367 data: 0.0539 max mem: 9377 +Train: [22] [4000/6250] eta: 0:05:31 lr: 0.000115 grad: 0.0617 (0.0678) loss: 0.8378 (0.8356) time: 0.1592 data: 0.0714 max mem: 9377 +Train: [22] [4100/6250] eta: 0:05:15 lr: 0.000115 grad: 0.0673 (0.0679) loss: 0.8400 (0.8356) time: 0.1375 data: 0.0538 max mem: 9377 +Train: [22] [4200/6250] eta: 0:05:01 lr: 0.000115 grad: 0.0623 (0.0679) loss: 0.8394 (0.8357) time: 0.1582 data: 0.0730 max mem: 9377 +Train: [22] [4300/6250] eta: 0:04:46 lr: 0.000115 grad: 0.0655 (0.0679) loss: 0.8366 (0.8357) time: 0.1501 data: 0.0681 max mem: 9377 +Train: [22] [4400/6250] eta: 0:04:31 lr: 0.000115 grad: 0.0648 (0.0679) loss: 0.8388 (0.8358) time: 0.1604 data: 0.0799 max mem: 9377 +Train: [22] [4500/6250] eta: 0:04:16 lr: 0.000115 grad: 0.0615 (0.0679) loss: 0.8373 (0.8358) time: 0.1518 data: 0.0654 max mem: 9377 +Train: [22] [4600/6250] eta: 0:04:01 lr: 0.000115 grad: 0.0659 (0.0679) loss: 0.8355 (0.8358) time: 0.1540 data: 0.0701 max mem: 9377 +Train: [22] [4700/6250] eta: 0:03:47 lr: 0.000115 grad: 0.0625 (0.0678) loss: 0.8347 (0.8359) time: 0.1243 data: 0.0370 max mem: 9377 +Train: [22] [4800/6250] eta: 0:03:32 lr: 0.000115 grad: 0.0646 (0.0678) loss: 0.8320 (0.8359) time: 0.1539 data: 0.0709 max mem: 9377 +Train: [22] [4900/6250] eta: 0:03:17 lr: 0.000114 grad: 0.0682 (0.0678) loss: 0.8342 (0.8359) time: 0.1376 data: 0.0563 max mem: 9377 +Train: [22] [5000/6250] eta: 0:03:02 lr: 0.000114 grad: 0.0654 (0.0678) loss: 0.8348 (0.8359) time: 0.1417 data: 0.0567 max mem: 9377 +Train: [22] [5100/6250] eta: 0:02:48 lr: 0.000114 grad: 0.0639 (0.0679) loss: 0.8389 (0.8359) time: 0.1512 data: 0.0688 max mem: 9377 +Train: [22] [5200/6250] eta: 0:02:33 lr: 0.000114 grad: 0.0634 (0.0679) loss: 0.8265 (0.8359) time: 0.1462 data: 0.0615 max mem: 9377 +Train: [22] [5300/6250] eta: 0:02:18 lr: 0.000114 grad: 0.0653 (0.0679) loss: 0.8360 (0.8358) time: 0.1401 data: 0.0594 max mem: 9377 +Train: [22] [5400/6250] eta: 0:02:04 lr: 0.000114 grad: 0.0631 (0.0679) loss: 0.8340 (0.8358) time: 0.1465 data: 0.0659 max mem: 9377 +Train: [22] [5500/6250] eta: 0:01:49 lr: 0.000114 grad: 0.0660 (0.0679) loss: 0.8338 (0.8357) time: 0.1491 data: 0.0709 max mem: 9377 +Train: [22] [5600/6250] eta: 0:01:34 lr: 0.000114 grad: 0.0687 (0.0679) loss: 0.8382 (0.8357) time: 0.1417 data: 0.0641 max mem: 9377 +Train: [22] [5700/6250] eta: 0:01:20 lr: 0.000114 grad: 0.0690 (0.0679) loss: 0.8359 (0.8357) time: 0.1456 data: 0.0632 max mem: 9377 +Train: [22] [5800/6250] eta: 0:01:05 lr: 0.000114 grad: 0.0648 (0.0679) loss: 0.8329 (0.8357) time: 0.1482 data: 0.0709 max mem: 9377 +Train: [22] [5900/6250] eta: 0:00:50 lr: 0.000114 grad: 0.0671 (0.0679) loss: 0.8361 (0.8357) time: 0.1425 data: 0.0571 max mem: 9377 +Train: [22] [6000/6250] eta: 0:00:36 lr: 0.000114 grad: 0.0669 (0.0680) loss: 0.8351 (0.8356) time: 0.1343 data: 0.0401 max mem: 9377 +Train: [22] [6100/6250] eta: 0:00:21 lr: 0.000114 grad: 0.0651 (0.0680) loss: 0.8356 (0.8356) time: 0.1314 data: 0.0545 max mem: 9377 +Train: [22] [6200/6250] eta: 0:00:07 lr: 0.000114 grad: 0.0636 (0.0679) loss: 0.8397 (0.8357) time: 0.1475 data: 0.0646 max mem: 9377 +Train: [22] [6249/6250] eta: 0:00:00 lr: 0.000114 grad: 0.0621 (0.0679) loss: 0.8353 (0.8357) time: 0.1420 data: 0.0605 max mem: 9377 +Train: [22] Total time: 0:15:13 (0.1462 s / it) +Averaged stats: lr: 0.000114 grad: 0.0621 (0.0679) loss: 0.8353 (0.8357) +Eval (hcp-train-subset): [22] [ 0/62] eta: 0:05:05 loss: 0.8421 (0.8421) time: 4.9205 data: 4.8854 max mem: 9377 +Eval (hcp-train-subset): [22] [61/62] eta: 0:00:00 loss: 0.8403 (0.8402) time: 0.1282 data: 0.1015 max mem: 9377 +Eval (hcp-train-subset): [22] Total time: 0:00:14 (0.2308 s / it) +Averaged stats (hcp-train-subset): loss: 0.8403 (0.8402) +Eval (hcp-val): [22] [ 0/62] eta: 0:04:55 loss: 0.8379 (0.8379) time: 4.7680 data: 4.7383 max mem: 9377 +Eval (hcp-val): [22] [61/62] eta: 0:00:00 loss: 0.8385 (0.8404) time: 0.1324 data: 0.1070 max mem: 9377 +Eval (hcp-val): [22] Total time: 0:00:12 (0.2087 s / it) +Averaged stats (hcp-val): loss: 0.8385 (0.8404) +Eval (nsd-val): [22] [ 0/62] eta: 0:03:53 loss: 0.8051 (0.8051) time: 3.7641 data: 3.6874 max mem: 9377 +Eval (nsd-val): [22] [61/62] eta: 0:00:00 loss: 0.8110 (0.8119) time: 0.1241 data: 0.0989 max mem: 9377 +Eval (nsd-val): [22] Total time: 0:00:12 (0.2086 s / it) +Averaged stats (nsd-val): loss: 0.8110 (0.8119) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [23] [ 0/6250] eta: 8:29:19 lr: 0.000114 grad: 0.1673 (0.1673) loss: 0.8335 (0.8335) time: 4.8895 data: 4.6112 max mem: 9377 +Train: [23] [ 100/6250] eta: 0:20:22 lr: 0.000114 grad: 0.0666 (0.0774) loss: 0.8398 (0.8344) time: 0.1517 data: 0.0674 max mem: 9377 +Train: [23] [ 200/6250] eta: 0:17:59 lr: 0.000114 grad: 0.0675 (0.0750) loss: 0.8363 (0.8352) time: 0.1752 data: 0.0861 max mem: 9377 +Train: [23] [ 300/6250] eta: 0:16:51 lr: 0.000114 grad: 0.0664 (0.0719) loss: 0.8398 (0.8357) time: 0.1484 data: 0.0566 max mem: 9377 +Train: [23] [ 400/6250] eta: 0:15:56 lr: 0.000114 grad: 0.0659 (0.0708) loss: 0.8360 (0.8358) time: 0.1356 data: 0.0335 max mem: 9377 +Train: [23] [ 500/6250] eta: 0:15:16 lr: 0.000114 grad: 0.0638 (0.0698) loss: 0.8401 (0.8360) time: 0.1582 data: 0.0755 max mem: 9377 +Train: [23] [ 600/6250] eta: 0:14:45 lr: 0.000114 grad: 0.0644 (0.0692) loss: 0.8405 (0.8362) time: 0.1450 data: 0.0623 max mem: 9377 +Train: [23] [ 700/6250] eta: 0:14:12 lr: 0.000114 grad: 0.0675 (0.0689) loss: 0.8358 (0.8360) time: 0.1357 data: 0.0528 max mem: 9377 +Train: [23] [ 800/6250] eta: 0:13:50 lr: 0.000114 grad: 0.0636 (0.0687) loss: 0.8360 (0.8361) time: 0.1280 data: 0.0365 max mem: 9377 +Train: [23] [ 900/6250] eta: 0:13:39 lr: 0.000114 grad: 0.0603 (0.0686) loss: 0.8373 (0.8360) time: 0.1359 data: 0.0490 max mem: 9377 +Train: [23] [1000/6250] eta: 0:13:22 lr: 0.000114 grad: 0.0618 (0.0684) loss: 0.8378 (0.8362) time: 0.1639 data: 0.0782 max mem: 9377 +Train: [23] [1100/6250] eta: 0:13:02 lr: 0.000114 grad: 0.0646 (0.0682) loss: 0.8352 (0.8362) time: 0.1590 data: 0.0747 max mem: 9377 +Train: [23] [1200/6250] eta: 0:12:40 lr: 0.000114 grad: 0.0641 (0.0682) loss: 0.8407 (0.8361) time: 0.1231 data: 0.0373 max mem: 9377 +Train: [23] [1300/6250] eta: 0:12:37 lr: 0.000114 grad: 0.0638 (0.0682) loss: 0.8414 (0.8361) time: 0.1532 data: 0.0686 max mem: 9377 +Train: [23] [1400/6250] eta: 0:12:21 lr: 0.000114 grad: 0.0665 (0.0682) loss: 0.8306 (0.8360) time: 0.1512 data: 0.0714 max mem: 9377 +Train: [23] [1500/6250] eta: 0:12:06 lr: 0.000114 grad: 0.0645 (0.0682) loss: 0.8435 (0.8359) time: 0.1380 data: 0.0579 max mem: 9377 +Train: [23] [1600/6250] eta: 0:11:49 lr: 0.000114 grad: 0.0672 (0.0681) loss: 0.8339 (0.8359) time: 0.1450 data: 0.0617 max mem: 9377 +Train: [23] [1700/6250] eta: 0:11:32 lr: 0.000114 grad: 0.0630 (0.0683) loss: 0.8342 (0.8358) time: 0.1583 data: 0.0752 max mem: 9377 +Train: [23] [1800/6250] eta: 0:11:13 lr: 0.000114 grad: 0.0654 (0.0684) loss: 0.8345 (0.8358) time: 0.1308 data: 0.0451 max mem: 9377 +Train: [23] [1900/6250] eta: 0:10:57 lr: 0.000114 grad: 0.0666 (0.0684) loss: 0.8361 (0.8357) time: 0.1387 data: 0.0591 max mem: 9377 +Train: [23] [2000/6250] eta: 0:10:40 lr: 0.000114 grad: 0.0691 (0.0685) loss: 0.8366 (0.8355) time: 0.1415 data: 0.0593 max mem: 9377 +Train: [23] [2100/6250] eta: 0:10:21 lr: 0.000114 grad: 0.0659 (0.0686) loss: 0.8374 (0.8354) time: 0.1132 data: 0.0193 max mem: 9377 +Train: [23] [2200/6250] eta: 0:10:04 lr: 0.000114 grad: 0.0701 (0.0685) loss: 0.8318 (0.8354) time: 0.1425 data: 0.0481 max mem: 9377 +Train: [23] [2300/6250] eta: 0:09:49 lr: 0.000114 grad: 0.0660 (0.0685) loss: 0.8313 (0.8353) time: 0.1559 data: 0.0739 max mem: 9377 +Train: [23] [2400/6250] eta: 0:09:34 lr: 0.000114 grad: 0.0655 (0.0685) loss: 0.8369 (0.8353) time: 0.0981 data: 0.0134 max mem: 9377 +Train: [23] [2500/6250] eta: 0:09:17 lr: 0.000114 grad: 0.0695 (0.0686) loss: 0.8320 (0.8352) time: 0.1268 data: 0.0375 max mem: 9377 +Train: [23] [2600/6250] eta: 0:09:02 lr: 0.000114 grad: 0.0711 (0.0687) loss: 0.8345 (0.8351) time: 0.1264 data: 0.0485 max mem: 9377 +Train: [23] [2700/6250] eta: 0:08:46 lr: 0.000114 grad: 0.0691 (0.0687) loss: 0.8277 (0.8350) time: 0.1322 data: 0.0470 max mem: 9377 +Train: [23] [2800/6250] eta: 0:08:30 lr: 0.000114 grad: 0.0693 (0.0687) loss: 0.8310 (0.8349) time: 0.1436 data: 0.0608 max mem: 9377 +Train: [23] [2900/6250] eta: 0:08:14 lr: 0.000114 grad: 0.0670 (0.0688) loss: 0.8307 (0.8347) time: 0.1350 data: 0.0510 max mem: 9377 +Train: [23] [3000/6250] eta: 0:07:58 lr: 0.000114 grad: 0.0701 (0.0688) loss: 0.8338 (0.8346) time: 0.1198 data: 0.0384 max mem: 9377 +Train: [23] [3100/6250] eta: 0:07:43 lr: 0.000114 grad: 0.0668 (0.0688) loss: 0.8330 (0.8346) time: 0.1416 data: 0.0599 max mem: 9377 +Train: [23] [3200/6250] eta: 0:07:27 lr: 0.000114 grad: 0.0664 (0.0689) loss: 0.8353 (0.8346) time: 0.1259 data: 0.0388 max mem: 9377 +Train: [23] [3300/6250] eta: 0:07:11 lr: 0.000114 grad: 0.0679 (0.0689) loss: 0.8312 (0.8346) time: 0.1354 data: 0.0507 max mem: 9377 +Train: [23] [3400/6250] eta: 0:06:56 lr: 0.000114 grad: 0.0679 (0.0689) loss: 0.8340 (0.8346) time: 0.1582 data: 0.0789 max mem: 9377 +Train: [23] [3500/6250] eta: 0:06:42 lr: 0.000114 grad: 0.0641 (0.0688) loss: 0.8396 (0.8346) time: 0.1619 data: 0.0830 max mem: 9377 +Train: [23] [3600/6250] eta: 0:06:27 lr: 0.000114 grad: 0.0673 (0.0688) loss: 0.8311 (0.8346) time: 0.1719 data: 0.0922 max mem: 9377 +Train: [23] [3700/6250] eta: 0:06:12 lr: 0.000114 grad: 0.0655 (0.0688) loss: 0.8338 (0.8346) time: 0.1467 data: 0.0647 max mem: 9377 +Train: [23] [3800/6250] eta: 0:05:57 lr: 0.000114 grad: 0.0649 (0.0688) loss: 0.8379 (0.8346) time: 0.1494 data: 0.0696 max mem: 9377 +Train: [23] [3900/6250] eta: 0:05:42 lr: 0.000114 grad: 0.0608 (0.0687) loss: 0.8391 (0.8346) time: 0.1361 data: 0.0460 max mem: 9377 +Train: [23] [4000/6250] eta: 0:05:27 lr: 0.000113 grad: 0.0649 (0.0687) loss: 0.8396 (0.8347) time: 0.1484 data: 0.0647 max mem: 9377 +Train: [23] [4100/6250] eta: 0:05:13 lr: 0.000113 grad: 0.0615 (0.0686) loss: 0.8375 (0.8347) time: 0.1448 data: 0.0637 max mem: 9377 +Train: [23] [4200/6250] eta: 0:04:58 lr: 0.000113 grad: 0.0617 (0.0685) loss: 0.8384 (0.8347) time: 0.1689 data: 0.0884 max mem: 9377 +Train: [23] [4300/6250] eta: 0:04:43 lr: 0.000113 grad: 0.0658 (0.0685) loss: 0.8396 (0.8348) time: 0.1499 data: 0.0665 max mem: 9377 +Train: [23] [4400/6250] eta: 0:04:29 lr: 0.000113 grad: 0.0655 (0.0685) loss: 0.8371 (0.8348) time: 0.1406 data: 0.0553 max mem: 9377 +Train: [23] [4500/6250] eta: 0:04:14 lr: 0.000113 grad: 0.0646 (0.0685) loss: 0.8318 (0.8348) time: 0.1491 data: 0.0688 max mem: 9377 +Train: [23] [4600/6250] eta: 0:04:00 lr: 0.000113 grad: 0.0628 (0.0684) loss: 0.8393 (0.8348) time: 0.1657 data: 0.0886 max mem: 9377 +Train: [23] [4700/6250] eta: 0:03:46 lr: 0.000113 grad: 0.0634 (0.0684) loss: 0.8335 (0.8348) time: 0.1567 data: 0.0658 max mem: 9377 +Train: [23] [4800/6250] eta: 0:03:31 lr: 0.000113 grad: 0.0612 (0.0684) loss: 0.8345 (0.8348) time: 0.1615 data: 0.0827 max mem: 9377 +Train: [23] [4900/6250] eta: 0:03:16 lr: 0.000113 grad: 0.0676 (0.0684) loss: 0.8330 (0.8347) time: 0.1230 data: 0.0364 max mem: 9377 +Train: [23] [5000/6250] eta: 0:03:02 lr: 0.000113 grad: 0.0637 (0.0684) loss: 0.8364 (0.8347) time: 0.1527 data: 0.0744 max mem: 9377 +Train: [23] [5100/6250] eta: 0:02:47 lr: 0.000113 grad: 0.0660 (0.0683) loss: 0.8329 (0.8346) time: 0.1348 data: 0.0533 max mem: 9377 +Train: [23] [5200/6250] eta: 0:02:33 lr: 0.000113 grad: 0.0729 (0.0683) loss: 0.8275 (0.8346) time: 0.1440 data: 0.0586 max mem: 9377 +Train: [23] [5300/6250] eta: 0:02:18 lr: 0.000113 grad: 0.0649 (0.0683) loss: 0.8321 (0.8346) time: 0.1549 data: 0.0750 max mem: 9377 +Train: [23] [5400/6250] eta: 0:02:03 lr: 0.000113 grad: 0.0647 (0.0683) loss: 0.8372 (0.8345) time: 0.1523 data: 0.0611 max mem: 9377 +Train: [23] [5500/6250] eta: 0:01:49 lr: 0.000113 grad: 0.0630 (0.0683) loss: 0.8375 (0.8345) time: 0.1391 data: 0.0586 max mem: 9377 +Train: [23] [5600/6250] eta: 0:01:34 lr: 0.000113 grad: 0.0648 (0.0683) loss: 0.8353 (0.8345) time: 0.1262 data: 0.0438 max mem: 9377 +Train: [23] [5700/6250] eta: 0:01:19 lr: 0.000113 grad: 0.0667 (0.0683) loss: 0.8402 (0.8346) time: 0.1384 data: 0.0558 max mem: 9377 +Train: [23] [5800/6250] eta: 0:01:05 lr: 0.000113 grad: 0.0610 (0.0683) loss: 0.8358 (0.8346) time: 0.1349 data: 0.0582 max mem: 9377 +Train: [23] [5900/6250] eta: 0:00:50 lr: 0.000113 grad: 0.0650 (0.0683) loss: 0.8392 (0.8346) time: 0.1361 data: 0.0538 max mem: 9377 +Train: [23] [6000/6250] eta: 0:00:36 lr: 0.000113 grad: 0.0692 (0.0683) loss: 0.8371 (0.8346) time: 0.1591 data: 0.0802 max mem: 9377 +Train: [23] [6100/6250] eta: 0:00:21 lr: 0.000113 grad: 0.0688 (0.0684) loss: 0.8328 (0.8346) time: 0.1214 data: 0.0349 max mem: 9377 +Train: [23] [6200/6250] eta: 0:00:07 lr: 0.000113 grad: 0.0641 (0.0684) loss: 0.8347 (0.8346) time: 0.1484 data: 0.0680 max mem: 9377 +Train: [23] [6249/6250] eta: 0:00:00 lr: 0.000113 grad: 0.0659 (0.0685) loss: 0.8310 (0.8346) time: 0.1323 data: 0.0462 max mem: 9377 +Train: [23] Total time: 0:15:11 (0.1459 s / it) +Averaged stats: lr: 0.000113 grad: 0.0659 (0.0685) loss: 0.8310 (0.8346) +Eval (hcp-train-subset): [23] [ 0/62] eta: 0:04:40 loss: 0.8392 (0.8392) time: 4.5230 data: 4.4799 max mem: 9377 +Eval (hcp-train-subset): [23] [61/62] eta: 0:00:00 loss: 0.8402 (0.8406) time: 0.1359 data: 0.1110 max mem: 9377 +Eval (hcp-train-subset): [23] Total time: 0:00:14 (0.2305 s / it) +Averaged stats (hcp-train-subset): loss: 0.8402 (0.8406) +Eval (hcp-val): [23] [ 0/62] eta: 0:03:31 loss: 0.8383 (0.8383) time: 3.4043 data: 3.3448 max mem: 9377 +Eval (hcp-val): [23] [61/62] eta: 0:00:00 loss: 0.8395 (0.8409) time: 0.1292 data: 0.1040 max mem: 9377 +Eval (hcp-val): [23] Total time: 0:00:12 (0.2050 s / it) +Averaged stats (hcp-val): loss: 0.8395 (0.8409) +Eval (nsd-val): [23] [ 0/62] eta: 0:03:35 loss: 0.8029 (0.8029) time: 3.4836 data: 3.3970 max mem: 9377 +Eval (nsd-val): [23] [61/62] eta: 0:00:00 loss: 0.8104 (0.8129) time: 0.1157 data: 0.0890 max mem: 9377 +Eval (nsd-val): [23] Total time: 0:00:12 (0.2087 s / it) +Averaged stats (nsd-val): loss: 0.8104 (0.8129) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [24] [ 0/6250] eta: 10:06:41 lr: 0.000113 grad: 0.0586 (0.0586) loss: 0.8411 (0.8411) time: 5.8242 data: 5.6963 max mem: 9377 +Train: [24] [ 100/6250] eta: 0:20:40 lr: 0.000113 grad: 0.0661 (0.0720) loss: 0.8350 (0.8385) time: 0.1496 data: 0.0523 max mem: 9377 +Train: [24] [ 200/6250] eta: 0:17:44 lr: 0.000113 grad: 0.0684 (0.0720) loss: 0.8343 (0.8370) time: 0.1474 data: 0.0589 max mem: 9377 +Train: [24] [ 300/6250] eta: 0:16:37 lr: 0.000113 grad: 0.0615 (0.0704) loss: 0.8404 (0.8368) time: 0.1693 data: 0.0820 max mem: 9377 +Train: [24] [ 400/6250] eta: 0:15:48 lr: 0.000113 grad: 0.0600 (0.0698) loss: 0.8450 (0.8365) time: 0.1426 data: 0.0562 max mem: 9377 +Train: [24] [ 500/6250] eta: 0:15:21 lr: 0.000113 grad: 0.0605 (0.0685) loss: 0.8391 (0.8373) time: 0.1785 data: 0.0935 max mem: 9377 +Train: [24] [ 600/6250] eta: 0:14:45 lr: 0.000113 grad: 0.0622 (0.0678) loss: 0.8414 (0.8379) time: 0.1575 data: 0.0711 max mem: 9377 +Train: [24] [ 700/6250] eta: 0:14:21 lr: 0.000113 grad: 0.0662 (0.0673) loss: 0.8429 (0.8378) time: 0.1556 data: 0.0703 max mem: 9377 +Train: [24] [ 800/6250] eta: 0:14:00 lr: 0.000113 grad: 0.0710 (0.0673) loss: 0.8369 (0.8374) time: 0.1565 data: 0.0751 max mem: 9377 +Train: [24] [ 900/6250] eta: 0:13:40 lr: 0.000113 grad: 0.0622 (0.0671) loss: 0.8386 (0.8373) time: 0.1442 data: 0.0575 max mem: 9377 +Train: [24] [1000/6250] eta: 0:13:19 lr: 0.000113 grad: 0.0650 (0.0674) loss: 0.8357 (0.8372) time: 0.1331 data: 0.0458 max mem: 9377 +Train: [24] [1100/6250] eta: 0:13:17 lr: 0.000113 grad: 0.0636 (0.0674) loss: 0.8389 (0.8368) time: 0.1671 data: 0.0864 max mem: 9377 +Train: [24] [1200/6250] eta: 0:13:03 lr: 0.000113 grad: 0.0686 (0.0674) loss: 0.8329 (0.8367) time: 0.1385 data: 0.0530 max mem: 9377 +Train: [24] [1300/6250] eta: 0:12:47 lr: 0.000113 grad: 0.0643 (0.0675) loss: 0.8346 (0.8363) time: 0.1547 data: 0.0758 max mem: 9377 +Train: [24] [1400/6250] eta: 0:12:27 lr: 0.000113 grad: 0.0650 (0.0676) loss: 0.8334 (0.8360) time: 0.1505 data: 0.0712 max mem: 9377 +Train: [24] [1500/6250] eta: 0:12:09 lr: 0.000113 grad: 0.0658 (0.0678) loss: 0.8306 (0.8358) time: 0.1466 data: 0.0674 max mem: 9377 +Train: [24] [1600/6250] eta: 0:11:52 lr: 0.000113 grad: 0.0650 (0.0677) loss: 0.8333 (0.8356) time: 0.1734 data: 0.0947 max mem: 9377 +Train: [24] [1700/6250] eta: 0:11:35 lr: 0.000113 grad: 0.0664 (0.0677) loss: 0.8324 (0.8355) time: 0.1508 data: 0.0663 max mem: 9377 +Train: [24] [1800/6250] eta: 0:11:18 lr: 0.000113 grad: 0.0676 (0.0678) loss: 0.8303 (0.8352) time: 0.1360 data: 0.0503 max mem: 9377 +Train: [24] [1900/6250] eta: 0:11:00 lr: 0.000113 grad: 0.0680 (0.0679) loss: 0.8290 (0.8351) time: 0.1364 data: 0.0478 max mem: 9377 +Train: [24] [2000/6250] eta: 0:10:42 lr: 0.000113 grad: 0.0655 (0.0679) loss: 0.8388 (0.8350) time: 0.1234 data: 0.0302 max mem: 9377 +Train: [24] [2100/6250] eta: 0:10:24 lr: 0.000113 grad: 0.0674 (0.0679) loss: 0.8341 (0.8348) time: 0.1313 data: 0.0425 max mem: 9377 +Train: [24] [2200/6250] eta: 0:10:09 lr: 0.000113 grad: 0.0713 (0.0682) loss: 0.8218 (0.8346) time: 0.1400 data: 0.0574 max mem: 9377 +Train: [24] [2300/6250] eta: 0:09:54 lr: 0.000113 grad: 0.0637 (0.0682) loss: 0.8292 (0.8344) time: 0.1428 data: 0.0645 max mem: 9377 +Train: [24] [2400/6250] eta: 0:09:37 lr: 0.000113 grad: 0.0711 (0.0683) loss: 0.8303 (0.8343) time: 0.1204 data: 0.0471 max mem: 9377 +Train: [24] [2500/6250] eta: 0:09:19 lr: 0.000113 grad: 0.0658 (0.0683) loss: 0.8252 (0.8341) time: 0.1244 data: 0.0439 max mem: 9377 +Train: [24] [2600/6250] eta: 0:09:03 lr: 0.000113 grad: 0.0707 (0.0684) loss: 0.8253 (0.8340) time: 0.1642 data: 0.0692 max mem: 9377 +Train: [24] [2700/6250] eta: 0:08:48 lr: 0.000113 grad: 0.0654 (0.0685) loss: 0.8394 (0.8340) time: 0.1350 data: 0.0479 max mem: 9377 +Train: [24] [2800/6250] eta: 0:08:33 lr: 0.000113 grad: 0.0650 (0.0685) loss: 0.8356 (0.8340) time: 0.1444 data: 0.0510 max mem: 9377 +Train: [24] [2900/6250] eta: 0:08:17 lr: 0.000112 grad: 0.0715 (0.0686) loss: 0.8361 (0.8341) time: 0.1472 data: 0.0636 max mem: 9377 +Train: [24] [3000/6250] eta: 0:08:01 lr: 0.000112 grad: 0.0783 (0.0687) loss: 0.8253 (0.8340) time: 0.1327 data: 0.0456 max mem: 9377 +Train: [24] [3100/6250] eta: 0:07:44 lr: 0.000112 grad: 0.0626 (0.0687) loss: 0.8355 (0.8340) time: 0.1386 data: 0.0547 max mem: 9377 +Train: [24] [3200/6250] eta: 0:07:29 lr: 0.000112 grad: 0.0653 (0.0688) loss: 0.8382 (0.8340) time: 0.1440 data: 0.0611 max mem: 9377 +Train: [24] [3300/6250] eta: 0:07:12 lr: 0.000112 grad: 0.0654 (0.0687) loss: 0.8329 (0.8340) time: 0.1341 data: 0.0452 max mem: 9377 +Train: [24] [3400/6250] eta: 0:06:57 lr: 0.000112 grad: 0.0683 (0.0688) loss: 0.8285 (0.8339) time: 0.1491 data: 0.0608 max mem: 9377 +Train: [24] [3500/6250] eta: 0:06:42 lr: 0.000112 grad: 0.0645 (0.0688) loss: 0.8361 (0.8340) time: 0.1310 data: 0.0462 max mem: 9377 +Train: [24] [3600/6250] eta: 0:06:27 lr: 0.000112 grad: 0.0686 (0.0688) loss: 0.8373 (0.8339) time: 0.1602 data: 0.0788 max mem: 9377 +Train: [24] [3700/6250] eta: 0:06:12 lr: 0.000112 grad: 0.0637 (0.0689) loss: 0.8367 (0.8339) time: 0.1314 data: 0.0523 max mem: 9377 +Train: [24] [3800/6250] eta: 0:05:57 lr: 0.000112 grad: 0.0661 (0.0690) loss: 0.8359 (0.8339) time: 0.1386 data: 0.0522 max mem: 9377 +Train: [24] [3900/6250] eta: 0:05:43 lr: 0.000112 grad: 0.0644 (0.0689) loss: 0.8343 (0.8339) time: 0.1578 data: 0.0704 max mem: 9377 +Train: [24] [4000/6250] eta: 0:05:28 lr: 0.000112 grad: 0.0686 (0.0690) loss: 0.8333 (0.8339) time: 0.1346 data: 0.0509 max mem: 9377 +Train: [24] [4100/6250] eta: 0:05:13 lr: 0.000112 grad: 0.0711 (0.0690) loss: 0.8220 (0.8338) time: 0.1565 data: 0.0784 max mem: 9377 +Train: [24] [4200/6250] eta: 0:04:58 lr: 0.000112 grad: 0.0696 (0.0689) loss: 0.8355 (0.8338) time: 0.1359 data: 0.0472 max mem: 9377 +Train: [24] [4300/6250] eta: 0:04:43 lr: 0.000112 grad: 0.0685 (0.0690) loss: 0.8306 (0.8337) time: 0.1338 data: 0.0491 max mem: 9377 +Train: [24] [4400/6250] eta: 0:04:29 lr: 0.000112 grad: 0.0638 (0.0690) loss: 0.8345 (0.8337) time: 0.1470 data: 0.0674 max mem: 9377 +Train: [24] [4500/6250] eta: 0:04:14 lr: 0.000112 grad: 0.0640 (0.0690) loss: 0.8360 (0.8337) time: 0.1586 data: 0.0830 max mem: 9377 +Train: [24] [4600/6250] eta: 0:04:00 lr: 0.000112 grad: 0.0633 (0.0691) loss: 0.8283 (0.8336) time: 0.1455 data: 0.0617 max mem: 9377 +Train: [24] [4700/6250] eta: 0:03:46 lr: 0.000112 grad: 0.0667 (0.0691) loss: 0.8361 (0.8335) time: 0.1794 data: 0.0954 max mem: 9377 +Train: [24] [4800/6250] eta: 0:03:31 lr: 0.000112 grad: 0.0681 (0.0692) loss: 0.8296 (0.8334) time: 0.1753 data: 0.0916 max mem: 9377 +Train: [24] [4900/6250] eta: 0:03:17 lr: 0.000112 grad: 0.0704 (0.0692) loss: 0.8245 (0.8334) time: 0.1522 data: 0.0737 max mem: 9377 +Train: [24] [5000/6250] eta: 0:03:02 lr: 0.000112 grad: 0.0716 (0.0692) loss: 0.8296 (0.8333) time: 0.1525 data: 0.0661 max mem: 9377 +Train: [24] [5100/6250] eta: 0:02:48 lr: 0.000112 grad: 0.0666 (0.0693) loss: 0.8334 (0.8333) time: 0.1473 data: 0.0671 max mem: 9377 +Train: [24] [5200/6250] eta: 0:02:33 lr: 0.000112 grad: 0.0670 (0.0693) loss: 0.8353 (0.8333) time: 0.1121 data: 0.0310 max mem: 9377 +Train: [24] [5300/6250] eta: 0:02:18 lr: 0.000112 grad: 0.0663 (0.0693) loss: 0.8305 (0.8332) time: 0.1550 data: 0.0682 max mem: 9377 +Train: [24] [5400/6250] eta: 0:02:03 lr: 0.000112 grad: 0.0657 (0.0693) loss: 0.8368 (0.8331) time: 0.1540 data: 0.0779 max mem: 9377 +Train: [24] [5500/6250] eta: 0:01:49 lr: 0.000112 grad: 0.0639 (0.0693) loss: 0.8334 (0.8331) time: 0.1489 data: 0.0640 max mem: 9377 +Train: [24] [5600/6250] eta: 0:01:34 lr: 0.000112 grad: 0.0686 (0.0693) loss: 0.8325 (0.8331) time: 0.1396 data: 0.0583 max mem: 9377 +Train: [24] [5700/6250] eta: 0:01:20 lr: 0.000112 grad: 0.0668 (0.0693) loss: 0.8227 (0.8330) time: 0.1287 data: 0.0439 max mem: 9377 +Train: [24] [5800/6250] eta: 0:01:05 lr: 0.000112 grad: 0.0699 (0.0692) loss: 0.8325 (0.8330) time: 0.1459 data: 0.0638 max mem: 9377 +Train: [24] [5900/6250] eta: 0:00:51 lr: 0.000112 grad: 0.0623 (0.0693) loss: 0.8305 (0.8330) time: 0.1497 data: 0.0648 max mem: 9377 +Train: [24] [6000/6250] eta: 0:00:36 lr: 0.000112 grad: 0.0669 (0.0692) loss: 0.8340 (0.8330) time: 0.1391 data: 0.0564 max mem: 9377 +Train: [24] [6100/6250] eta: 0:00:21 lr: 0.000112 grad: 0.0706 (0.0692) loss: 0.8267 (0.8330) time: 0.1282 data: 0.0362 max mem: 9377 +Train: [24] [6200/6250] eta: 0:00:07 lr: 0.000112 grad: 0.0672 (0.0692) loss: 0.8344 (0.8330) time: 0.1489 data: 0.0649 max mem: 9377 +Train: [24] [6249/6250] eta: 0:00:00 lr: 0.000112 grad: 0.0688 (0.0692) loss: 0.8331 (0.8330) time: 0.1457 data: 0.0687 max mem: 9377 +Train: [24] Total time: 0:15:14 (0.1464 s / it) +Averaged stats: lr: 0.000112 grad: 0.0688 (0.0692) loss: 0.8331 (0.8330) +Eval (hcp-train-subset): [24] [ 0/62] eta: 0:05:42 loss: 0.8389 (0.8389) time: 5.5167 data: 5.4853 max mem: 9377 +Eval (hcp-train-subset): [24] [61/62] eta: 0:00:00 loss: 0.8380 (0.8390) time: 0.1273 data: 0.1021 max mem: 9377 +Eval (hcp-train-subset): [24] Total time: 0:00:14 (0.2362 s / it) +Averaged stats (hcp-train-subset): loss: 0.8380 (0.8390) +Making plots (hcp-train-subset): example=29 +Eval (hcp-val): [24] [ 0/62] eta: 0:05:47 loss: 0.8330 (0.8330) time: 5.6127 data: 5.5818 max mem: 9377 +Eval (hcp-val): [24] [61/62] eta: 0:00:00 loss: 0.8391 (0.8399) time: 0.1397 data: 0.1141 max mem: 9377 +Eval (hcp-val): [24] Total time: 0:00:15 (0.2483 s / it) +Averaged stats (hcp-val): loss: 0.8391 (0.8399) +Making plots (hcp-val): example=27 +Eval (nsd-val): [24] [ 0/62] eta: 0:04:54 loss: 0.8022 (0.8022) time: 4.7493 data: 4.7190 max mem: 9377 +Eval (nsd-val): [24] [61/62] eta: 0:00:00 loss: 0.8127 (0.8122) time: 0.1353 data: 0.1098 max mem: 9377 +Eval (nsd-val): [24] Total time: 0:00:13 (0.2181 s / it) +Averaged stats (nsd-val): loss: 0.8127 (0.8122) +Making plots (nsd-val): example=33 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00024.pth +Train: [25] [ 0/6250] eta: 7:40:04 lr: 0.000112 grad: 0.0528 (0.0528) loss: 0.8624 (0.8624) time: 4.4168 data: 4.2163 max mem: 9377 +Train: [25] [ 100/6250] eta: 0:20:34 lr: 0.000112 grad: 0.0696 (0.0716) loss: 0.8408 (0.8442) time: 0.1621 data: 0.0721 max mem: 9377 +Train: [25] [ 200/6250] eta: 0:18:00 lr: 0.000112 grad: 0.0620 (0.0691) loss: 0.8384 (0.8420) time: 0.1881 data: 0.0985 max mem: 9377 +Train: [25] [ 300/6250] eta: 0:16:44 lr: 0.000112 grad: 0.0665 (0.0677) loss: 0.8404 (0.8415) time: 0.1569 data: 0.0637 max mem: 9377 +Train: [25] [ 400/6250] eta: 0:15:53 lr: 0.000112 grad: 0.0627 (0.0677) loss: 0.8331 (0.8400) time: 0.1406 data: 0.0414 max mem: 9377 +Train: [25] [ 500/6250] eta: 0:15:15 lr: 0.000112 grad: 0.0640 (0.0673) loss: 0.8361 (0.8395) time: 0.1556 data: 0.0607 max mem: 9377 +Train: [25] [ 600/6250] eta: 0:14:52 lr: 0.000112 grad: 0.0650 (0.0670) loss: 0.8366 (0.8389) time: 0.1452 data: 0.0597 max mem: 9377 +Train: [25] [ 700/6250] eta: 0:14:23 lr: 0.000112 grad: 0.0659 (0.0670) loss: 0.8326 (0.8382) time: 0.1588 data: 0.0706 max mem: 9377 +Train: [25] [ 800/6250] eta: 0:13:57 lr: 0.000112 grad: 0.0657 (0.0670) loss: 0.8360 (0.8377) time: 0.1528 data: 0.0659 max mem: 9377 +Train: [25] [ 900/6250] eta: 0:13:47 lr: 0.000112 grad: 0.0688 (0.0669) loss: 0.8339 (0.8370) time: 0.1483 data: 0.0662 max mem: 9377 +Train: [25] [1000/6250] eta: 0:13:26 lr: 0.000112 grad: 0.0644 (0.0669) loss: 0.8319 (0.8366) time: 0.1230 data: 0.0484 max mem: 9377 +Train: [25] [1100/6250] eta: 0:13:06 lr: 0.000112 grad: 0.0646 (0.0669) loss: 0.8352 (0.8362) time: 0.1405 data: 0.0634 max mem: 9377 +Train: [25] [1200/6250] eta: 0:12:55 lr: 0.000112 grad: 0.0625 (0.0669) loss: 0.8343 (0.8359) time: 0.1630 data: 0.0774 max mem: 9377 +Train: [25] [1300/6250] eta: 0:12:40 lr: 0.000112 grad: 0.0658 (0.0669) loss: 0.8301 (0.8357) time: 0.1418 data: 0.0561 max mem: 9377 +Train: [25] [1400/6250] eta: 0:12:21 lr: 0.000112 grad: 0.0600 (0.0668) loss: 0.8363 (0.8356) time: 0.1174 data: 0.0308 max mem: 9377 +Train: [25] [1500/6250] eta: 0:12:02 lr: 0.000112 grad: 0.0681 (0.0670) loss: 0.8303 (0.8353) time: 0.1340 data: 0.0550 max mem: 9377 +Train: [25] [1600/6250] eta: 0:11:45 lr: 0.000111 grad: 0.0676 (0.0670) loss: 0.8292 (0.8350) time: 0.1344 data: 0.0498 max mem: 9377 +Train: [25] [1700/6250] eta: 0:11:27 lr: 0.000111 grad: 0.0658 (0.0672) loss: 0.8313 (0.8346) time: 0.1304 data: 0.0347 max mem: 9377 +Train: [25] [1800/6250] eta: 0:11:08 lr: 0.000111 grad: 0.0654 (0.0674) loss: 0.8344 (0.8345) time: 0.1163 data: 0.0341 max mem: 9377 +Train: [25] [1900/6250] eta: 0:10:50 lr: 0.000111 grad: 0.0690 (0.0676) loss: 0.8310 (0.8343) time: 0.1376 data: 0.0573 max mem: 9377 +Train: [25] [2000/6250] eta: 0:10:38 lr: 0.000111 grad: 0.0680 (0.0677) loss: 0.8325 (0.8343) time: 0.1752 data: 0.0862 max mem: 9377 +Train: [25] [2100/6250] eta: 0:10:27 lr: 0.000111 grad: 0.0669 (0.0678) loss: 0.8285 (0.8341) time: 0.1765 data: 0.0907 max mem: 9377 +Train: [25] [2200/6250] eta: 0:10:13 lr: 0.000111 grad: 0.0718 (0.0679) loss: 0.8260 (0.8339) time: 0.1708 data: 0.0845 max mem: 9377 +Train: [25] [2300/6250] eta: 0:09:56 lr: 0.000111 grad: 0.0679 (0.0680) loss: 0.8370 (0.8337) time: 0.1330 data: 0.0526 max mem: 9377 +Train: [25] [2400/6250] eta: 0:09:40 lr: 0.000111 grad: 0.0684 (0.0681) loss: 0.8314 (0.8336) time: 0.1322 data: 0.0452 max mem: 9377 +Train: [25] [2500/6250] eta: 0:09:24 lr: 0.000111 grad: 0.0655 (0.0682) loss: 0.8322 (0.8335) time: 0.1375 data: 0.0502 max mem: 9377 +Train: [25] [2600/6250] eta: 0:09:07 lr: 0.000111 grad: 0.0699 (0.0683) loss: 0.8265 (0.8334) time: 0.1354 data: 0.0601 max mem: 9377 +Train: [25] [2700/6250] eta: 0:08:51 lr: 0.000111 grad: 0.0681 (0.0685) loss: 0.8281 (0.8333) time: 0.1390 data: 0.0582 max mem: 9377 +Train: [25] [2800/6250] eta: 0:08:34 lr: 0.000111 grad: 0.0676 (0.0685) loss: 0.8305 (0.8332) time: 0.1385 data: 0.0537 max mem: 9377 +Train: [25] [2900/6250] eta: 0:08:19 lr: 0.000111 grad: 0.0700 (0.0686) loss: 0.8306 (0.8331) time: 0.1872 data: 0.1088 max mem: 9377 +Train: [25] [3000/6250] eta: 0:08:04 lr: 0.000111 grad: 0.0653 (0.0686) loss: 0.8360 (0.8331) time: 0.1410 data: 0.0523 max mem: 9377 +Train: [25] [3100/6250] eta: 0:07:50 lr: 0.000111 grad: 0.0724 (0.0687) loss: 0.8300 (0.8331) time: 0.1614 data: 0.0727 max mem: 9377 +Train: [25] [3200/6250] eta: 0:07:36 lr: 0.000111 grad: 0.0654 (0.0687) loss: 0.8373 (0.8331) time: 0.1617 data: 0.0809 max mem: 9377 +Train: [25] [3300/6250] eta: 0:07:22 lr: 0.000111 grad: 0.0668 (0.0688) loss: 0.8269 (0.8330) time: 0.1856 data: 0.0997 max mem: 9377 +Train: [25] [3400/6250] eta: 0:07:09 lr: 0.000111 grad: 0.0681 (0.0689) loss: 0.8298 (0.8329) time: 0.1641 data: 0.0801 max mem: 9377 +Train: [25] [3500/6250] eta: 0:06:56 lr: 0.000111 grad: 0.0641 (0.0690) loss: 0.8337 (0.8328) time: 0.1789 data: 0.0992 max mem: 9377 +Train: [25] [3600/6250] eta: 0:06:41 lr: 0.000111 grad: 0.0686 (0.0690) loss: 0.8324 (0.8328) time: 0.1485 data: 0.0682 max mem: 9377 +Train: [25] [3700/6250] eta: 0:06:27 lr: 0.000111 grad: 0.0712 (0.0692) loss: 0.8259 (0.8328) time: 0.1740 data: 0.0970 max mem: 9377 +Train: [25] [3800/6250] eta: 0:06:13 lr: 0.000111 grad: 0.0630 (0.0692) loss: 0.8339 (0.8327) time: 0.2255 data: 0.1448 max mem: 9377 +Train: [25] [3900/6250] eta: 0:05:56 lr: 0.000111 grad: 0.0661 (0.0694) loss: 0.8281 (0.8326) time: 0.1542 data: 0.0758 max mem: 9377 +Train: [25] [4000/6250] eta: 0:05:42 lr: 0.000111 grad: 0.0618 (0.0694) loss: 0.8338 (0.8326) time: 0.1714 data: 0.0921 max mem: 9377 +Train: [25] [4100/6250] eta: 0:05:26 lr: 0.000111 grad: 0.0774 (0.0696) loss: 0.8288 (0.8325) time: 0.1384 data: 0.0574 max mem: 9377 +Train: [25] [4200/6250] eta: 0:05:10 lr: 0.000111 grad: 0.0714 (0.0697) loss: 0.8311 (0.8324) time: 0.1450 data: 0.0642 max mem: 9377 +Train: [25] [4300/6250] eta: 0:04:54 lr: 0.000111 grad: 0.0719 (0.0697) loss: 0.8281 (0.8323) time: 0.1469 data: 0.0609 max mem: 9377 +Train: [25] [4400/6250] eta: 0:04:39 lr: 0.000111 grad: 0.0672 (0.0698) loss: 0.8308 (0.8322) time: 0.1812 data: 0.0994 max mem: 9377 +Train: [25] [4500/6250] eta: 0:04:24 lr: 0.000111 grad: 0.0728 (0.0699) loss: 0.8280 (0.8321) time: 0.1400 data: 0.0598 max mem: 9377 +Train: [25] [4600/6250] eta: 0:04:08 lr: 0.000111 grad: 0.0669 (0.0699) loss: 0.8372 (0.8321) time: 0.1273 data: 0.0404 max mem: 9377 +Train: [25] [4700/6250] eta: 0:03:53 lr: 0.000111 grad: 0.0681 (0.0699) loss: 0.8351 (0.8321) time: 0.1516 data: 0.0718 max mem: 9377 +Train: [25] [4800/6250] eta: 0:03:38 lr: 0.000111 grad: 0.0724 (0.0700) loss: 0.8353 (0.8321) time: 0.1721 data: 0.1002 max mem: 9377 +Train: [25] [4900/6250] eta: 0:03:23 lr: 0.000111 grad: 0.0691 (0.0700) loss: 0.8318 (0.8321) time: 0.1362 data: 0.0585 max mem: 9377 +Train: [25] [5000/6250] eta: 0:03:08 lr: 0.000111 grad: 0.0713 (0.0700) loss: 0.8328 (0.8321) time: 0.1714 data: 0.0786 max mem: 9377 +Train: [25] [5100/6250] eta: 0:02:53 lr: 0.000111 grad: 0.0689 (0.0700) loss: 0.8320 (0.8320) time: 0.1533 data: 0.0771 max mem: 9377 +Train: [25] [5200/6250] eta: 0:02:38 lr: 0.000111 grad: 0.0666 (0.0700) loss: 0.8366 (0.8321) time: 0.1363 data: 0.0578 max mem: 9377 +Train: [25] [5300/6250] eta: 0:02:23 lr: 0.000111 grad: 0.0654 (0.0700) loss: 0.8320 (0.8321) time: 0.1721 data: 0.0945 max mem: 9377 +Train: [25] [5400/6250] eta: 0:02:08 lr: 0.000111 grad: 0.0661 (0.0700) loss: 0.8321 (0.8321) time: 0.1746 data: 0.0978 max mem: 9377 +Train: [25] [5500/6250] eta: 0:01:53 lr: 0.000111 grad: 0.0617 (0.0699) loss: 0.8307 (0.8321) time: 0.1453 data: 0.0610 max mem: 9377 +Train: [25] [5600/6250] eta: 0:01:38 lr: 0.000111 grad: 0.0666 (0.0699) loss: 0.8364 (0.8322) time: 0.1497 data: 0.0724 max mem: 9377 +Train: [25] [5700/6250] eta: 0:01:23 lr: 0.000111 grad: 0.0673 (0.0698) loss: 0.8343 (0.8323) time: 0.1333 data: 0.0472 max mem: 9377 +Train: [25] [5800/6250] eta: 0:01:07 lr: 0.000111 grad: 0.0680 (0.0698) loss: 0.8286 (0.8322) time: 0.1487 data: 0.0654 max mem: 9377 +Train: [25] [5900/6250] eta: 0:00:52 lr: 0.000111 grad: 0.0664 (0.0698) loss: 0.8379 (0.8323) time: 0.1387 data: 0.0466 max mem: 9377 +Train: [25] [6000/6250] eta: 0:00:37 lr: 0.000111 grad: 0.0666 (0.0698) loss: 0.8372 (0.8323) time: 0.1290 data: 0.0490 max mem: 9377 +Train: [25] [6100/6250] eta: 0:00:22 lr: 0.000111 grad: 0.0711 (0.0698) loss: 0.8307 (0.8323) time: 0.1216 data: 0.0373 max mem: 9377 +Train: [25] [6200/6250] eta: 0:00:07 lr: 0.000111 grad: 0.0697 (0.0697) loss: 0.8297 (0.8323) time: 0.1471 data: 0.0659 max mem: 9377 +Train: [25] [6249/6250] eta: 0:00:00 lr: 0.000111 grad: 0.0659 (0.0697) loss: 0.8353 (0.8323) time: 0.1565 data: 0.0741 max mem: 9377 +Train: [25] Total time: 0:15:45 (0.1512 s / it) +Averaged stats: lr: 0.000111 grad: 0.0659 (0.0697) loss: 0.8353 (0.8323) +Eval (hcp-train-subset): [25] [ 0/62] eta: 0:06:27 loss: 0.8416 (0.8416) time: 6.2515 data: 6.2208 max mem: 9377 +Eval (hcp-train-subset): [25] [61/62] eta: 0:00:00 loss: 0.8363 (0.8385) time: 0.1321 data: 0.1046 max mem: 9377 +Eval (hcp-train-subset): [25] Total time: 0:00:15 (0.2423 s / it) +Averaged stats (hcp-train-subset): loss: 0.8363 (0.8385) +Eval (hcp-val): [25] [ 0/62] eta: 0:04:44 loss: 0.8360 (0.8360) time: 4.5940 data: 4.5588 max mem: 9377 +Eval (hcp-val): [25] [61/62] eta: 0:00:00 loss: 0.8397 (0.8403) time: 0.1270 data: 0.1015 max mem: 9377 +Eval (hcp-val): [25] Total time: 0:00:13 (0.2203 s / it) +Averaged stats (hcp-val): loss: 0.8397 (0.8403) +Eval (nsd-val): [25] [ 0/62] eta: 0:03:55 loss: 0.8001 (0.8001) time: 3.7939 data: 3.7382 max mem: 9377 +Eval (nsd-val): [25] [61/62] eta: 0:00:00 loss: 0.8130 (0.8138) time: 0.1475 data: 0.1222 max mem: 9377 +Eval (nsd-val): [25] Total time: 0:00:13 (0.2230 s / it) +Averaged stats (nsd-val): loss: 0.8130 (0.8138) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [26] [ 0/6250] eta: 11:25:24 lr: 0.000111 grad: 0.1113 (0.1113) loss: 0.8519 (0.8519) time: 6.5799 data: 6.4647 max mem: 9377 +Train: [26] [ 100/6250] eta: 0:21:46 lr: 0.000111 grad: 0.0759 (0.0845) loss: 0.8269 (0.8281) time: 0.1691 data: 0.0744 max mem: 9377 +Train: [26] [ 200/6250] eta: 0:18:36 lr: 0.000110 grad: 0.0706 (0.0804) loss: 0.8273 (0.8281) time: 0.1201 data: 0.0353 max mem: 9377 +Train: [26] [ 300/6250] eta: 0:17:35 lr: 0.000110 grad: 0.0654 (0.0763) loss: 0.8334 (0.8293) time: 0.1359 data: 0.0485 max mem: 9377 +Train: [26] [ 400/6250] eta: 0:16:32 lr: 0.000110 grad: 0.0652 (0.0745) loss: 0.8257 (0.8295) time: 0.1441 data: 0.0485 max mem: 9377 +Train: [26] [ 500/6250] eta: 0:15:52 lr: 0.000110 grad: 0.0645 (0.0734) loss: 0.8325 (0.8298) time: 0.1557 data: 0.0642 max mem: 9377 +Train: [26] [ 600/6250] eta: 0:15:33 lr: 0.000110 grad: 0.0626 (0.0723) loss: 0.8351 (0.8304) time: 0.1644 data: 0.0779 max mem: 9377 +Train: [26] [ 700/6250] eta: 0:15:08 lr: 0.000110 grad: 0.0640 (0.0715) loss: 0.8306 (0.8308) time: 0.1547 data: 0.0713 max mem: 9377 +Train: [26] [ 800/6250] eta: 0:14:45 lr: 0.000110 grad: 0.0626 (0.0708) loss: 0.8392 (0.8313) time: 0.1634 data: 0.0812 max mem: 9377 +Train: [26] [ 900/6250] eta: 0:14:18 lr: 0.000110 grad: 0.0657 (0.0702) loss: 0.8325 (0.8315) time: 0.1337 data: 0.0529 max mem: 9377 +Train: [26] [1000/6250] eta: 0:13:58 lr: 0.000110 grad: 0.0620 (0.0698) loss: 0.8316 (0.8319) time: 0.1481 data: 0.0675 max mem: 9377 +Train: [26] [1100/6250] eta: 0:13:37 lr: 0.000110 grad: 0.0648 (0.0694) loss: 0.8291 (0.8320) time: 0.1435 data: 0.0582 max mem: 9377 +Train: [26] [1200/6250] eta: 0:13:14 lr: 0.000110 grad: 0.0624 (0.0693) loss: 0.8346 (0.8321) time: 0.1380 data: 0.0501 max mem: 9377 +Train: [26] [1300/6250] eta: 0:12:52 lr: 0.000110 grad: 0.0700 (0.0694) loss: 0.8293 (0.8321) time: 0.1402 data: 0.0576 max mem: 9377 +Train: [26] [1400/6250] eta: 0:12:31 lr: 0.000110 grad: 0.0670 (0.0692) loss: 0.8365 (0.8321) time: 0.1514 data: 0.0762 max mem: 9377 +Train: [26] [1500/6250] eta: 0:12:10 lr: 0.000110 grad: 0.0670 (0.0691) loss: 0.8333 (0.8320) time: 0.1343 data: 0.0546 max mem: 9377 +Train: [26] [1600/6250] eta: 0:11:52 lr: 0.000110 grad: 0.0629 (0.0690) loss: 0.8329 (0.8321) time: 0.1496 data: 0.0708 max mem: 9377 +Train: [26] [1700/6250] eta: 0:11:35 lr: 0.000110 grad: 0.0675 (0.0689) loss: 0.8301 (0.8322) time: 0.1428 data: 0.0603 max mem: 9377 +Train: [26] [1800/6250] eta: 0:11:23 lr: 0.000110 grad: 0.0654 (0.0688) loss: 0.8348 (0.8323) time: 0.1851 data: 0.1060 max mem: 9377 +Train: [26] [1900/6250] eta: 0:11:07 lr: 0.000110 grad: 0.0638 (0.0687) loss: 0.8304 (0.8324) time: 0.1460 data: 0.0630 max mem: 9377 +Train: [26] [2000/6250] eta: 0:10:50 lr: 0.000110 grad: 0.0696 (0.0687) loss: 0.8342 (0.8324) time: 0.1397 data: 0.0566 max mem: 9377 +Train: [26] [2100/6250] eta: 0:10:34 lr: 0.000110 grad: 0.0623 (0.0686) loss: 0.8375 (0.8326) time: 0.1312 data: 0.0481 max mem: 9377 +Train: [26] [2200/6250] eta: 0:10:18 lr: 0.000110 grad: 0.0658 (0.0685) loss: 0.8373 (0.8327) time: 0.1357 data: 0.0537 max mem: 9377 +Train: [26] [2300/6250] eta: 0:10:00 lr: 0.000110 grad: 0.0665 (0.0685) loss: 0.8328 (0.8328) time: 0.1499 data: 0.0636 max mem: 9377 +Train: [26] [2400/6250] eta: 0:09:44 lr: 0.000110 grad: 0.0689 (0.0684) loss: 0.8301 (0.8328) time: 0.1459 data: 0.0556 max mem: 9377 +Train: [26] [2500/6250] eta: 0:09:26 lr: 0.000110 grad: 0.0675 (0.0685) loss: 0.8378 (0.8328) time: 0.1293 data: 0.0446 max mem: 9377 +Train: [26] [2600/6250] eta: 0:09:09 lr: 0.000110 grad: 0.0654 (0.0685) loss: 0.8323 (0.8328) time: 0.1325 data: 0.0441 max mem: 9377 +Train: [26] [2700/6250] eta: 0:08:54 lr: 0.000110 grad: 0.0683 (0.0685) loss: 0.8342 (0.8328) time: 0.1500 data: 0.0688 max mem: 9377 +Train: [26] [2800/6250] eta: 0:08:37 lr: 0.000110 grad: 0.0645 (0.0685) loss: 0.8272 (0.8328) time: 0.1380 data: 0.0541 max mem: 9377 +Train: [26] [2900/6250] eta: 0:08:21 lr: 0.000110 grad: 0.0667 (0.0685) loss: 0.8331 (0.8328) time: 0.1298 data: 0.0439 max mem: 9377 +Train: [26] [3000/6250] eta: 0:08:06 lr: 0.000110 grad: 0.0691 (0.0686) loss: 0.8331 (0.8328) time: 0.1451 data: 0.0526 max mem: 9377 +Train: [26] [3100/6250] eta: 0:07:52 lr: 0.000110 grad: 0.0677 (0.0686) loss: 0.8332 (0.8328) time: 0.1695 data: 0.0950 max mem: 9377 +Train: [26] [3200/6250] eta: 0:07:37 lr: 0.000110 grad: 0.0692 (0.0687) loss: 0.8326 (0.8327) time: 0.1454 data: 0.0677 max mem: 9377 +Train: [26] [3300/6250] eta: 0:07:23 lr: 0.000110 grad: 0.0670 (0.0687) loss: 0.8305 (0.8328) time: 0.1734 data: 0.0951 max mem: 9377 +Train: [26] [3400/6250] eta: 0:07:07 lr: 0.000110 grad: 0.0696 (0.0688) loss: 0.8306 (0.8328) time: 0.1429 data: 0.0611 max mem: 9377 +Train: [26] [3500/6250] eta: 0:06:53 lr: 0.000110 grad: 0.0692 (0.0688) loss: 0.8301 (0.8327) time: 0.1686 data: 0.0915 max mem: 9377 +Train: [26] [3600/6250] eta: 0:06:39 lr: 0.000110 grad: 0.0699 (0.0688) loss: 0.8353 (0.8327) time: 0.1524 data: 0.0719 max mem: 9377 +Train: [26] [3700/6250] eta: 0:06:24 lr: 0.000110 grad: 0.0698 (0.0689) loss: 0.8334 (0.8327) time: 0.1598 data: 0.0756 max mem: 9377 +Train: [26] [3800/6250] eta: 0:06:10 lr: 0.000110 grad: 0.0601 (0.0689) loss: 0.8318 (0.8326) time: 0.1619 data: 0.0851 max mem: 9377 +Train: [26] [3900/6250] eta: 0:05:55 lr: 0.000110 grad: 0.0666 (0.0690) loss: 0.8281 (0.8326) time: 0.1699 data: 0.0886 max mem: 9377 +Train: [26] [4000/6250] eta: 0:05:40 lr: 0.000110 grad: 0.0703 (0.0690) loss: 0.8248 (0.8326) time: 0.1508 data: 0.0749 max mem: 9377 +Train: [26] [4100/6250] eta: 0:05:25 lr: 0.000110 grad: 0.0644 (0.0690) loss: 0.8380 (0.8326) time: 0.1543 data: 0.0727 max mem: 9377 +Train: [26] [4200/6250] eta: 0:05:10 lr: 0.000110 grad: 0.0671 (0.0689) loss: 0.8345 (0.8326) time: 0.1414 data: 0.0633 max mem: 9377 +Train: [26] [4300/6250] eta: 0:04:54 lr: 0.000110 grad: 0.0655 (0.0690) loss: 0.8305 (0.8326) time: 0.1518 data: 0.0751 max mem: 9377 +Train: [26] [4400/6250] eta: 0:04:39 lr: 0.000110 grad: 0.0691 (0.0690) loss: 0.8342 (0.8325) time: 0.1433 data: 0.0624 max mem: 9377 +Train: [26] [4500/6250] eta: 0:04:24 lr: 0.000110 grad: 0.0684 (0.0690) loss: 0.8303 (0.8325) time: 0.1541 data: 0.0722 max mem: 9377 +Train: [26] [4600/6250] eta: 0:04:09 lr: 0.000110 grad: 0.0694 (0.0691) loss: 0.8253 (0.8325) time: 0.1630 data: 0.0744 max mem: 9377 +Train: [26] [4700/6250] eta: 0:03:55 lr: 0.000110 grad: 0.0649 (0.0691) loss: 0.8294 (0.8324) time: 0.1566 data: 0.0749 max mem: 9377 +Train: [26] [4800/6250] eta: 0:03:40 lr: 0.000109 grad: 0.0669 (0.0691) loss: 0.8343 (0.8324) time: 0.1822 data: 0.1060 max mem: 9377 +Train: [26] [4900/6250] eta: 0:03:25 lr: 0.000109 grad: 0.0677 (0.0691) loss: 0.8294 (0.8323) time: 0.1392 data: 0.0559 max mem: 9377 +Train: [26] [5000/6250] eta: 0:03:11 lr: 0.000109 grad: 0.0682 (0.0691) loss: 0.8215 (0.8322) time: 0.1566 data: 0.0740 max mem: 9377 +Train: [26] [5100/6250] eta: 0:02:56 lr: 0.000109 grad: 0.0732 (0.0692) loss: 0.8244 (0.8321) time: 0.1903 data: 0.1171 max mem: 9377 +Train: [26] [5200/6250] eta: 0:02:40 lr: 0.000109 grad: 0.0664 (0.0692) loss: 0.8291 (0.8321) time: 0.1545 data: 0.0708 max mem: 9377 +Train: [26] [5300/6250] eta: 0:02:25 lr: 0.000109 grad: 0.0658 (0.0693) loss: 0.8329 (0.8320) time: 0.2043 data: 0.1336 max mem: 9377 +Train: [26] [5400/6250] eta: 0:02:10 lr: 0.000109 grad: 0.0690 (0.0693) loss: 0.8304 (0.8320) time: 0.1562 data: 0.0764 max mem: 9377 +Train: [26] [5500/6250] eta: 0:01:55 lr: 0.000109 grad: 0.0662 (0.0693) loss: 0.8339 (0.8319) time: 0.1532 data: 0.0703 max mem: 9377 +Train: [26] [5600/6250] eta: 0:01:40 lr: 0.000109 grad: 0.0687 (0.0694) loss: 0.8266 (0.8318) time: 0.1792 data: 0.1005 max mem: 9377 +Train: [26] [5700/6250] eta: 0:01:25 lr: 0.000109 grad: 0.0709 (0.0694) loss: 0.8274 (0.8318) time: 0.1448 data: 0.0700 max mem: 9377 +Train: [26] [5800/6250] eta: 0:01:09 lr: 0.000109 grad: 0.0698 (0.0694) loss: 0.8301 (0.8318) time: 0.1587 data: 0.0801 max mem: 9377 +Train: [26] [5900/6250] eta: 0:00:54 lr: 0.000109 grad: 0.0683 (0.0694) loss: 0.8265 (0.8317) time: 0.1641 data: 0.0870 max mem: 9377 +Train: [26] [6000/6250] eta: 0:00:38 lr: 0.000109 grad: 0.0650 (0.0695) loss: 0.8321 (0.8317) time: 0.1591 data: 0.0751 max mem: 9377 +Train: [26] [6100/6250] eta: 0:00:23 lr: 0.000109 grad: 0.0654 (0.0695) loss: 0.8259 (0.8316) time: 0.1547 data: 0.0736 max mem: 9377 +Train: [26] [6200/6250] eta: 0:00:07 lr: 0.000109 grad: 0.0717 (0.0696) loss: 0.8279 (0.8316) time: 0.1459 data: 0.0628 max mem: 9377 +Train: [26] [6249/6250] eta: 0:00:00 lr: 0.000109 grad: 0.0677 (0.0697) loss: 0.8290 (0.8316) time: 0.1462 data: 0.0623 max mem: 9377 +Train: [26] Total time: 0:16:11 (0.1555 s / it) +Averaged stats: lr: 0.000109 grad: 0.0677 (0.0697) loss: 0.8290 (0.8316) +Eval (hcp-train-subset): [26] [ 0/62] eta: 0:05:22 loss: 0.8388 (0.8388) time: 5.2001 data: 5.1661 max mem: 9377 +Eval (hcp-train-subset): [26] [61/62] eta: 0:00:00 loss: 0.8369 (0.8386) time: 0.1618 data: 0.1367 max mem: 9377 +Eval (hcp-train-subset): [26] Total time: 0:00:15 (0.2482 s / it) +Averaged stats (hcp-train-subset): loss: 0.8369 (0.8386) +Eval (hcp-val): [26] [ 0/62] eta: 0:04:10 loss: 0.8361 (0.8361) time: 4.0356 data: 3.9642 max mem: 9377 +Eval (hcp-val): [26] [61/62] eta: 0:00:00 loss: 0.8391 (0.8394) time: 0.1599 data: 0.1344 max mem: 9377 +Eval (hcp-val): [26] Total time: 0:00:14 (0.2413 s / it) +Averaged stats (hcp-val): loss: 0.8391 (0.8394) +Eval (nsd-val): [26] [ 0/62] eta: 0:04:56 loss: 0.8038 (0.8038) time: 4.7885 data: 4.7572 max mem: 9377 +Eval (nsd-val): [26] [61/62] eta: 0:00:00 loss: 0.8109 (0.8125) time: 0.1592 data: 0.1322 max mem: 9377 +Eval (nsd-val): [26] Total time: 0:00:14 (0.2380 s / it) +Averaged stats (nsd-val): loss: 0.8109 (0.8125) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [27] [ 0/6250] eta: 8:03:52 lr: 0.000109 grad: 0.1529 (0.1529) loss: 0.8502 (0.8502) time: 4.6452 data: 4.4415 max mem: 9377 +Train: [27] [ 100/6250] eta: 0:23:57 lr: 0.000109 grad: 0.0669 (0.0733) loss: 0.8418 (0.8436) time: 0.1657 data: 0.0717 max mem: 9377 +Train: [27] [ 200/6250] eta: 0:20:38 lr: 0.000109 grad: 0.0610 (0.0693) loss: 0.8418 (0.8424) time: 0.1753 data: 0.0795 max mem: 9377 +Train: [27] [ 300/6250] eta: 0:19:12 lr: 0.000109 grad: 0.0671 (0.0682) loss: 0.8459 (0.8422) time: 0.1654 data: 0.0852 max mem: 9377 +Train: [27] [ 400/6250] eta: 0:18:46 lr: 0.000109 grad: 0.0639 (0.0681) loss: 0.8380 (0.8414) time: 0.1747 data: 0.0876 max mem: 9377 +Train: [27] [ 500/6250] eta: 0:17:59 lr: 0.000109 grad: 0.0606 (0.0681) loss: 0.8406 (0.8409) time: 0.1624 data: 0.0727 max mem: 9377 +Train: [27] [ 600/6250] eta: 0:17:15 lr: 0.000109 grad: 0.0704 (0.0681) loss: 0.8344 (0.8402) time: 0.1711 data: 0.0862 max mem: 9377 +Train: [27] [ 700/6250] eta: 0:16:44 lr: 0.000109 grad: 0.0680 (0.0688) loss: 0.8359 (0.8391) time: 0.1742 data: 0.0883 max mem: 9377 +Train: [27] [ 800/6250] eta: 0:16:08 lr: 0.000109 grad: 0.0727 (0.0693) loss: 0.8360 (0.8384) time: 0.1511 data: 0.0656 max mem: 9377 +Train: [27] [ 900/6250] eta: 0:15:42 lr: 0.000109 grad: 0.0684 (0.0695) loss: 0.8297 (0.8377) time: 0.1704 data: 0.0708 max mem: 9377 +Train: [27] [1000/6250] eta: 0:15:07 lr: 0.000109 grad: 0.0681 (0.0699) loss: 0.8319 (0.8370) time: 0.1577 data: 0.0754 max mem: 9377 +Train: [27] [1100/6250] eta: 0:14:38 lr: 0.000109 grad: 0.0680 (0.0700) loss: 0.8305 (0.8364) time: 0.1669 data: 0.0853 max mem: 9377 +Train: [27] [1200/6250] eta: 0:14:10 lr: 0.000109 grad: 0.0656 (0.0700) loss: 0.8300 (0.8358) time: 0.1561 data: 0.0726 max mem: 9377 +Train: [27] [1300/6250] eta: 0:13:47 lr: 0.000109 grad: 0.0693 (0.0701) loss: 0.8320 (0.8354) time: 0.1454 data: 0.0663 max mem: 9377 +Train: [27] [1400/6250] eta: 0:13:23 lr: 0.000109 grad: 0.0686 (0.0704) loss: 0.8277 (0.8349) time: 0.1620 data: 0.0799 max mem: 9377 +Train: [27] [1500/6250] eta: 0:13:03 lr: 0.000109 grad: 0.0711 (0.0705) loss: 0.8278 (0.8345) time: 0.1869 data: 0.1155 max mem: 9377 +Train: [27] [1600/6250] eta: 0:12:48 lr: 0.000109 grad: 0.0652 (0.0706) loss: 0.8297 (0.8342) time: 0.1773 data: 0.0932 max mem: 9377 +Train: [27] [1700/6250] eta: 0:12:31 lr: 0.000109 grad: 0.0719 (0.0708) loss: 0.8265 (0.8338) time: 0.1654 data: 0.0863 max mem: 9377 +Train: [27] [1800/6250] eta: 0:12:14 lr: 0.000109 grad: 0.0736 (0.0710) loss: 0.8220 (0.8334) time: 0.1622 data: 0.0789 max mem: 9377 +Train: [27] [1900/6250] eta: 0:11:54 lr: 0.000109 grad: 0.0722 (0.0711) loss: 0.8257 (0.8331) time: 0.1407 data: 0.0650 max mem: 9377 +Train: [27] [2000/6250] eta: 0:11:33 lr: 0.000109 grad: 0.0692 (0.0714) loss: 0.8309 (0.8329) time: 0.1437 data: 0.0579 max mem: 9377 +Train: [27] [2100/6250] eta: 0:11:14 lr: 0.000109 grad: 0.0704 (0.0715) loss: 0.8252 (0.8326) time: 0.1523 data: 0.0658 max mem: 9377 +Train: [27] [2200/6250] eta: 0:10:55 lr: 0.000109 grad: 0.0689 (0.0716) loss: 0.8310 (0.8324) time: 0.1413 data: 0.0573 max mem: 9377 +Train: [27] [2300/6250] eta: 0:10:35 lr: 0.000109 grad: 0.0673 (0.0717) loss: 0.8313 (0.8321) time: 0.1383 data: 0.0446 max mem: 9377 +Train: [27] [2400/6250] eta: 0:10:17 lr: 0.000109 grad: 0.0680 (0.0717) loss: 0.8277 (0.8320) time: 0.1619 data: 0.0852 max mem: 9377 +Train: [27] [2500/6250] eta: 0:09:58 lr: 0.000109 grad: 0.0715 (0.0720) loss: 0.8291 (0.8319) time: 0.1527 data: 0.0768 max mem: 9377 +Train: [27] [2600/6250] eta: 0:09:40 lr: 0.000109 grad: 0.0718 (0.0720) loss: 0.8247 (0.8317) time: 0.1275 data: 0.0461 max mem: 9377 +Train: [27] [2700/6250] eta: 0:09:23 lr: 0.000109 grad: 0.0720 (0.0720) loss: 0.8227 (0.8316) time: 0.1478 data: 0.0642 max mem: 9377 +Train: [27] [2800/6250] eta: 0:09:06 lr: 0.000109 grad: 0.0633 (0.0720) loss: 0.8298 (0.8315) time: 0.1578 data: 0.0727 max mem: 9377 +Train: [27] [2900/6250] eta: 0:08:49 lr: 0.000109 grad: 0.0725 (0.0719) loss: 0.8276 (0.8314) time: 0.1195 data: 0.0398 max mem: 9377 +Train: [27] [3000/6250] eta: 0:08:32 lr: 0.000109 grad: 0.0738 (0.0719) loss: 0.8244 (0.8313) time: 0.1437 data: 0.0617 max mem: 9377 +Train: [27] [3100/6250] eta: 0:08:15 lr: 0.000108 grad: 0.0755 (0.0720) loss: 0.8231 (0.8312) time: 0.1508 data: 0.0695 max mem: 9377 +Train: [27] [3200/6250] eta: 0:07:58 lr: 0.000108 grad: 0.0734 (0.0722) loss: 0.8255 (0.8311) time: 0.1494 data: 0.0717 max mem: 9377 +Train: [27] [3300/6250] eta: 0:07:41 lr: 0.000108 grad: 0.0690 (0.0722) loss: 0.8262 (0.8309) time: 0.1454 data: 0.0696 max mem: 9377 +Train: [27] [3400/6250] eta: 0:07:24 lr: 0.000108 grad: 0.0721 (0.0723) loss: 0.8262 (0.8308) time: 0.1516 data: 0.0725 max mem: 9377 +Train: [27] [3500/6250] eta: 0:07:08 lr: 0.000108 grad: 0.0735 (0.0723) loss: 0.8288 (0.8307) time: 0.1560 data: 0.0753 max mem: 9377 +Train: [27] [3600/6250] eta: 0:06:51 lr: 0.000108 grad: 0.0705 (0.0724) loss: 0.8280 (0.8307) time: 0.1281 data: 0.0464 max mem: 9377 +Train: [27] [3700/6250] eta: 0:06:34 lr: 0.000108 grad: 0.0756 (0.0725) loss: 0.8273 (0.8306) time: 0.1386 data: 0.0489 max mem: 9377 +Train: [27] [3800/6250] eta: 0:06:19 lr: 0.000108 grad: 0.0691 (0.0725) loss: 0.8314 (0.8306) time: 0.1497 data: 0.0715 max mem: 9377 +Train: [27] [3900/6250] eta: 0:06:04 lr: 0.000108 grad: 0.0735 (0.0725) loss: 0.8263 (0.8305) time: 0.1610 data: 0.0808 max mem: 9377 +Train: [27] [4000/6250] eta: 0:05:49 lr: 0.000108 grad: 0.0793 (0.0725) loss: 0.8305 (0.8305) time: 0.1779 data: 0.0998 max mem: 9377 +Train: [27] [4100/6250] eta: 0:05:33 lr: 0.000108 grad: 0.0703 (0.0726) loss: 0.8235 (0.8304) time: 0.1545 data: 0.0720 max mem: 9377 +Train: [27] [4200/6250] eta: 0:05:18 lr: 0.000108 grad: 0.0687 (0.0726) loss: 0.8254 (0.8303) time: 0.1745 data: 0.0976 max mem: 9377 +Train: [27] [4300/6250] eta: 0:05:03 lr: 0.000108 grad: 0.0688 (0.0725) loss: 0.8290 (0.8303) time: 0.1535 data: 0.0732 max mem: 9377 +Train: [27] [4400/6250] eta: 0:04:47 lr: 0.000108 grad: 0.0738 (0.0725) loss: 0.8274 (0.8302) time: 0.1613 data: 0.0854 max mem: 9377 +Train: [27] [4500/6250] eta: 0:04:31 lr: 0.000108 grad: 0.0701 (0.0726) loss: 0.8315 (0.8301) time: 0.1360 data: 0.0554 max mem: 9377 +Train: [27] [4600/6250] eta: 0:04:15 lr: 0.000108 grad: 0.0675 (0.0726) loss: 0.8293 (0.8301) time: 0.1514 data: 0.0691 max mem: 9377 +Train: [27] [4700/6250] eta: 0:03:59 lr: 0.000108 grad: 0.0668 (0.0725) loss: 0.8320 (0.8301) time: 0.1194 data: 0.0340 max mem: 9377 +Train: [27] [4800/6250] eta: 0:03:43 lr: 0.000108 grad: 0.0728 (0.0725) loss: 0.8249 (0.8300) time: 0.1293 data: 0.0384 max mem: 9377 +Train: [27] [4900/6250] eta: 0:03:28 lr: 0.000108 grad: 0.0688 (0.0725) loss: 0.8324 (0.8300) time: 0.1641 data: 0.0820 max mem: 9377 +Train: [27] [5000/6250] eta: 0:03:12 lr: 0.000108 grad: 0.0648 (0.0724) loss: 0.8337 (0.8301) time: 0.1323 data: 0.0449 max mem: 9377 +Train: [27] [5100/6250] eta: 0:02:56 lr: 0.000108 grad: 0.0629 (0.0724) loss: 0.8323 (0.8301) time: 0.1302 data: 0.0505 max mem: 9377 +Train: [27] [5200/6250] eta: 0:02:41 lr: 0.000108 grad: 0.0730 (0.0724) loss: 0.8306 (0.8301) time: 0.1564 data: 0.0711 max mem: 9377 +Train: [27] [5300/6250] eta: 0:02:25 lr: 0.000108 grad: 0.0675 (0.0724) loss: 0.8312 (0.8301) time: 0.1369 data: 0.0495 max mem: 9377 +Train: [27] [5400/6250] eta: 0:02:10 lr: 0.000108 grad: 0.0694 (0.0724) loss: 0.8311 (0.8301) time: 0.1325 data: 0.0513 max mem: 9377 +Train: [27] [5500/6250] eta: 0:01:54 lr: 0.000108 grad: 0.0715 (0.0724) loss: 0.8321 (0.8301) time: 0.1348 data: 0.0486 max mem: 9377 +Train: [27] [5600/6250] eta: 0:01:39 lr: 0.000108 grad: 0.0675 (0.0723) loss: 0.8291 (0.8301) time: 0.1452 data: 0.0662 max mem: 9377 +Train: [27] [5700/6250] eta: 0:01:23 lr: 0.000108 grad: 0.0726 (0.0723) loss: 0.8340 (0.8300) time: 0.1288 data: 0.0463 max mem: 9377 +Train: [27] [5800/6250] eta: 0:01:08 lr: 0.000108 grad: 0.0666 (0.0723) loss: 0.8330 (0.8300) time: 0.1381 data: 0.0592 max mem: 9377 +Train: [27] [5900/6250] eta: 0:00:53 lr: 0.000108 grad: 0.0707 (0.0723) loss: 0.8324 (0.8300) time: 0.1483 data: 0.0693 max mem: 9377 +Train: [27] [6000/6250] eta: 0:00:38 lr: 0.000108 grad: 0.0684 (0.0723) loss: 0.8295 (0.8300) time: 0.1186 data: 0.0297 max mem: 9377 +Train: [27] [6100/6250] eta: 0:00:22 lr: 0.000108 grad: 0.0671 (0.0722) loss: 0.8302 (0.8300) time: 0.1539 data: 0.0726 max mem: 9377 +Train: [27] [6200/6250] eta: 0:00:07 lr: 0.000108 grad: 0.0690 (0.0722) loss: 0.8249 (0.8300) time: 0.1574 data: 0.0767 max mem: 9377 +Train: [27] [6249/6250] eta: 0:00:00 lr: 0.000108 grad: 0.0681 (0.0722) loss: 0.8367 (0.8301) time: 0.1675 data: 0.0862 max mem: 9377 +Train: [27] Total time: 0:15:55 (0.1528 s / it) +Averaged stats: lr: 0.000108 grad: 0.0681 (0.0722) loss: 0.8367 (0.8301) +Eval (hcp-train-subset): [27] [ 0/62] eta: 0:03:30 loss: 0.8391 (0.8391) time: 3.4013 data: 3.3334 max mem: 9377 +Eval (hcp-train-subset): [27] [61/62] eta: 0:00:00 loss: 0.8398 (0.8389) time: 0.1383 data: 0.1125 max mem: 9377 +Eval (hcp-train-subset): [27] Total time: 0:00:14 (0.2359 s / it) +Averaged stats (hcp-train-subset): loss: 0.8398 (0.8389) +Eval (hcp-val): [27] [ 0/62] eta: 0:05:13 loss: 0.8366 (0.8366) time: 5.0554 data: 5.0229 max mem: 9377 +Eval (hcp-val): [27] [61/62] eta: 0:00:00 loss: 0.8396 (0.8400) time: 0.1150 data: 0.0898 max mem: 9377 +Eval (hcp-val): [27] Total time: 0:00:13 (0.2108 s / it) +Averaged stats (hcp-val): loss: 0.8396 (0.8400) +Eval (nsd-val): [27] [ 0/62] eta: 0:05:37 loss: 0.8066 (0.8066) time: 5.4475 data: 5.4174 max mem: 9377 +Eval (nsd-val): [27] [61/62] eta: 0:00:00 loss: 0.8134 (0.8136) time: 0.1501 data: 0.1244 max mem: 9377 +Eval (nsd-val): [27] Total time: 0:00:13 (0.2188 s / it) +Averaged stats (nsd-val): loss: 0.8134 (0.8136) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [28] [ 0/6250] eta: 8:38:33 lr: 0.000108 grad: 0.0546 (0.0546) loss: 0.8459 (0.8459) time: 4.9782 data: 4.7354 max mem: 9377 +Train: [28] [ 100/6250] eta: 0:21:08 lr: 0.000108 grad: 0.0666 (0.0729) loss: 0.8444 (0.8435) time: 0.1420 data: 0.0519 max mem: 9377 +Train: [28] [ 200/6250] eta: 0:18:11 lr: 0.000108 grad: 0.0660 (0.0728) loss: 0.8387 (0.8382) time: 0.1706 data: 0.0743 max mem: 9377 +Train: [28] [ 300/6250] eta: 0:17:23 lr: 0.000108 grad: 0.0664 (0.0735) loss: 0.8345 (0.8353) time: 0.1743 data: 0.0833 max mem: 9377 +Train: [28] [ 400/6250] eta: 0:16:42 lr: 0.000108 grad: 0.0698 (0.0732) loss: 0.8347 (0.8345) time: 0.1692 data: 0.0704 max mem: 9377 +Train: [28] [ 500/6250] eta: 0:16:24 lr: 0.000108 grad: 0.0653 (0.0722) loss: 0.8337 (0.8342) time: 0.1702 data: 0.0783 max mem: 9377 +Train: [28] [ 600/6250] eta: 0:16:09 lr: 0.000108 grad: 0.0659 (0.0717) loss: 0.8345 (0.8341) time: 0.1720 data: 0.0741 max mem: 9377 +Train: [28] [ 700/6250] eta: 0:15:47 lr: 0.000108 grad: 0.0637 (0.0715) loss: 0.8383 (0.8342) time: 0.1478 data: 0.0629 max mem: 9377 +Train: [28] [ 800/6250] eta: 0:15:17 lr: 0.000108 grad: 0.0676 (0.0710) loss: 0.8364 (0.8346) time: 0.1450 data: 0.0580 max mem: 9377 +Train: [28] [ 900/6250] eta: 0:14:50 lr: 0.000108 grad: 0.0630 (0.0706) loss: 0.8303 (0.8349) time: 0.1394 data: 0.0568 max mem: 9377 +Train: [28] [1000/6250] eta: 0:14:27 lr: 0.000108 grad: 0.0618 (0.0701) loss: 0.8396 (0.8350) time: 0.1650 data: 0.0831 max mem: 9377 +Train: [28] [1100/6250] eta: 0:14:03 lr: 0.000108 grad: 0.0632 (0.0697) loss: 0.8423 (0.8351) time: 0.1486 data: 0.0644 max mem: 9377 +Train: [28] [1200/6250] eta: 0:13:38 lr: 0.000108 grad: 0.0649 (0.0697) loss: 0.8368 (0.8351) time: 0.1447 data: 0.0632 max mem: 9377 +Train: [28] [1300/6250] eta: 0:13:25 lr: 0.000107 grad: 0.0682 (0.0697) loss: 0.8281 (0.8351) time: 0.1761 data: 0.0928 max mem: 9377 +Train: [28] [1400/6250] eta: 0:13:08 lr: 0.000107 grad: 0.0665 (0.0696) loss: 0.8391 (0.8350) time: 0.1824 data: 0.0995 max mem: 9377 +Train: [28] [1500/6250] eta: 0:12:50 lr: 0.000107 grad: 0.0683 (0.0695) loss: 0.8352 (0.8351) time: 0.1523 data: 0.0685 max mem: 9377 +Train: [28] [1600/6250] eta: 0:12:32 lr: 0.000107 grad: 0.0658 (0.0695) loss: 0.8353 (0.8351) time: 0.1922 data: 0.1110 max mem: 9377 +Train: [28] [1700/6250] eta: 0:12:12 lr: 0.000107 grad: 0.0649 (0.0694) loss: 0.8310 (0.8351) time: 0.1536 data: 0.0689 max mem: 9377 +Train: [28] [1800/6250] eta: 0:11:53 lr: 0.000107 grad: 0.0753 (0.0694) loss: 0.8353 (0.8350) time: 0.1312 data: 0.0492 max mem: 9377 +Train: [28] [1900/6250] eta: 0:11:34 lr: 0.000107 grad: 0.0670 (0.0695) loss: 0.8307 (0.8349) time: 0.1495 data: 0.0653 max mem: 9377 +Train: [28] [2000/6250] eta: 0:11:15 lr: 0.000107 grad: 0.0681 (0.0698) loss: 0.8302 (0.8347) time: 0.1438 data: 0.0556 max mem: 9377 +Train: [28] [2100/6250] eta: 0:10:56 lr: 0.000107 grad: 0.0686 (0.0698) loss: 0.8340 (0.8345) time: 0.1263 data: 0.0314 max mem: 9377 +Train: [28] [2200/6250] eta: 0:10:38 lr: 0.000107 grad: 0.0657 (0.0699) loss: 0.8310 (0.8344) time: 0.1519 data: 0.0695 max mem: 9377 +Train: [28] [2300/6250] eta: 0:10:20 lr: 0.000107 grad: 0.0648 (0.0699) loss: 0.8338 (0.8344) time: 0.1603 data: 0.0768 max mem: 9377 +Train: [28] [2400/6250] eta: 0:10:02 lr: 0.000107 grad: 0.0674 (0.0699) loss: 0.8329 (0.8343) time: 0.1442 data: 0.0629 max mem: 9377 +Train: [28] [2500/6250] eta: 0:09:45 lr: 0.000107 grad: 0.0694 (0.0698) loss: 0.8324 (0.8343) time: 0.1501 data: 0.0711 max mem: 9377 +Train: [28] [2600/6250] eta: 0:09:28 lr: 0.000107 grad: 0.0743 (0.0700) loss: 0.8352 (0.8343) time: 0.1479 data: 0.0661 max mem: 9377 +Train: [28] [2700/6250] eta: 0:09:11 lr: 0.000107 grad: 0.0712 (0.0701) loss: 0.8309 (0.8342) time: 0.1408 data: 0.0564 max mem: 9377 +Train: [28] [2800/6250] eta: 0:08:54 lr: 0.000107 grad: 0.0725 (0.0701) loss: 0.8310 (0.8342) time: 0.1442 data: 0.0610 max mem: 9377 +Train: [28] [2900/6250] eta: 0:08:37 lr: 0.000107 grad: 0.0703 (0.0703) loss: 0.8313 (0.8341) time: 0.1309 data: 0.0476 max mem: 9377 +Train: [28] [3000/6250] eta: 0:08:19 lr: 0.000107 grad: 0.0671 (0.0704) loss: 0.8326 (0.8340) time: 0.1294 data: 0.0425 max mem: 9377 +Train: [28] [3100/6250] eta: 0:08:03 lr: 0.000107 grad: 0.0694 (0.0704) loss: 0.8293 (0.8339) time: 0.1376 data: 0.0597 max mem: 9377 +Train: [28] [3200/6250] eta: 0:07:46 lr: 0.000107 grad: 0.0757 (0.0706) loss: 0.8298 (0.8338) time: 0.1276 data: 0.0442 max mem: 9377 +Train: [28] [3300/6250] eta: 0:07:30 lr: 0.000107 grad: 0.0686 (0.0706) loss: 0.8301 (0.8337) time: 0.1394 data: 0.0607 max mem: 9377 +Train: [28] [3400/6250] eta: 0:07:14 lr: 0.000107 grad: 0.0683 (0.0707) loss: 0.8328 (0.8336) time: 0.1622 data: 0.0823 max mem: 9377 +Train: [28] [3500/6250] eta: 0:06:57 lr: 0.000107 grad: 0.0704 (0.0708) loss: 0.8294 (0.8335) time: 0.1342 data: 0.0449 max mem: 9377 +Train: [28] [3600/6250] eta: 0:06:41 lr: 0.000107 grad: 0.0731 (0.0708) loss: 0.8294 (0.8334) time: 0.1399 data: 0.0583 max mem: 9377 +Train: [28] [3700/6250] eta: 0:06:25 lr: 0.000107 grad: 0.0700 (0.0709) loss: 0.8258 (0.8333) time: 0.1277 data: 0.0475 max mem: 9377 +Train: [28] [3800/6250] eta: 0:06:10 lr: 0.000107 grad: 0.0747 (0.0709) loss: 0.8277 (0.8331) time: 0.1494 data: 0.0651 max mem: 9377 +Train: [28] [3900/6250] eta: 0:05:54 lr: 0.000107 grad: 0.0645 (0.0710) loss: 0.8306 (0.8331) time: 0.1438 data: 0.0637 max mem: 9377 +Train: [28] [4000/6250] eta: 0:05:38 lr: 0.000107 grad: 0.0719 (0.0710) loss: 0.8268 (0.8330) time: 0.1402 data: 0.0583 max mem: 9377 +Train: [28] [4100/6250] eta: 0:05:23 lr: 0.000107 grad: 0.0709 (0.0710) loss: 0.8310 (0.8329) time: 0.1377 data: 0.0543 max mem: 9377 +Train: [28] [4200/6250] eta: 0:05:08 lr: 0.000107 grad: 0.0710 (0.0711) loss: 0.8273 (0.8328) time: 0.1595 data: 0.0802 max mem: 9377 +Train: [28] [4300/6250] eta: 0:04:52 lr: 0.000107 grad: 0.0682 (0.0711) loss: 0.8306 (0.8327) time: 0.1461 data: 0.0658 max mem: 9377 +Train: [28] [4400/6250] eta: 0:04:38 lr: 0.000107 grad: 0.0716 (0.0711) loss: 0.8302 (0.8327) time: 0.1501 data: 0.0687 max mem: 9377 +Train: [28] [4500/6250] eta: 0:04:23 lr: 0.000107 grad: 0.0683 (0.0711) loss: 0.8303 (0.8326) time: 0.1903 data: 0.1127 max mem: 9377 +Train: [28] [4600/6250] eta: 0:04:08 lr: 0.000107 grad: 0.0710 (0.0712) loss: 0.8287 (0.8326) time: 0.1368 data: 0.0524 max mem: 9377 +Train: [28] [4700/6250] eta: 0:03:54 lr: 0.000107 grad: 0.0666 (0.0711) loss: 0.8266 (0.8325) time: 0.1521 data: 0.0681 max mem: 9377 +Train: [28] [4800/6250] eta: 0:03:39 lr: 0.000107 grad: 0.0713 (0.0712) loss: 0.8316 (0.8324) time: 0.1523 data: 0.0774 max mem: 9377 +Train: [28] [4900/6250] eta: 0:03:24 lr: 0.000107 grad: 0.0673 (0.0712) loss: 0.8305 (0.8324) time: 0.1423 data: 0.0689 max mem: 9377 +Train: [28] [5000/6250] eta: 0:03:09 lr: 0.000107 grad: 0.0686 (0.0712) loss: 0.8322 (0.8323) time: 0.1479 data: 0.0661 max mem: 9377 +Train: [28] [5100/6250] eta: 0:02:54 lr: 0.000107 grad: 0.0725 (0.0712) loss: 0.8263 (0.8322) time: 0.1519 data: 0.0675 max mem: 9377 +Train: [28] [5200/6250] eta: 0:02:38 lr: 0.000107 grad: 0.0687 (0.0712) loss: 0.8256 (0.8322) time: 0.1294 data: 0.0407 max mem: 9377 +Train: [28] [5300/6250] eta: 0:02:23 lr: 0.000107 grad: 0.0655 (0.0712) loss: 0.8335 (0.8322) time: 0.1618 data: 0.0879 max mem: 9377 +Train: [28] [5400/6250] eta: 0:02:08 lr: 0.000107 grad: 0.0670 (0.0711) loss: 0.8302 (0.8322) time: 0.1489 data: 0.0721 max mem: 9377 +Train: [28] [5500/6250] eta: 0:01:53 lr: 0.000107 grad: 0.0686 (0.0712) loss: 0.8316 (0.8322) time: 0.1327 data: 0.0566 max mem: 9377 +Train: [28] [5600/6250] eta: 0:01:38 lr: 0.000106 grad: 0.0690 (0.0712) loss: 0.8353 (0.8322) time: 0.1426 data: 0.0601 max mem: 9377 +Train: [28] [5700/6250] eta: 0:01:22 lr: 0.000106 grad: 0.0670 (0.0712) loss: 0.8359 (0.8322) time: 0.1507 data: 0.0674 max mem: 9377 +Train: [28] [5800/6250] eta: 0:01:07 lr: 0.000106 grad: 0.0650 (0.0712) loss: 0.8319 (0.8321) time: 0.1343 data: 0.0507 max mem: 9377 +Train: [28] [5900/6250] eta: 0:00:52 lr: 0.000106 grad: 0.0694 (0.0713) loss: 0.8331 (0.8321) time: 0.1475 data: 0.0706 max mem: 9377 +Train: [28] [6000/6250] eta: 0:00:37 lr: 0.000106 grad: 0.0694 (0.0713) loss: 0.8278 (0.8321) time: 0.1534 data: 0.0590 max mem: 9377 +Train: [28] [6100/6250] eta: 0:00:22 lr: 0.000106 grad: 0.0729 (0.0713) loss: 0.8326 (0.8321) time: 0.1575 data: 0.0756 max mem: 9377 +Train: [28] [6200/6250] eta: 0:00:07 lr: 0.000106 grad: 0.0688 (0.0713) loss: 0.8285 (0.8321) time: 0.1465 data: 0.0673 max mem: 9377 +Train: [28] [6249/6250] eta: 0:00:00 lr: 0.000106 grad: 0.0696 (0.0713) loss: 0.8326 (0.8321) time: 0.2056 data: 0.0625 max mem: 9377 +Train: [28] Total time: 0:15:47 (0.1515 s / it) +Averaged stats: lr: 0.000106 grad: 0.0696 (0.0713) loss: 0.8326 (0.8321) +Eval (hcp-train-subset): [28] [ 0/62] eta: 0:04:40 loss: 0.8376 (0.8376) time: 4.5179 data: 4.4235 max mem: 9377 +Eval (hcp-train-subset): [28] [61/62] eta: 0:00:00 loss: 0.8365 (0.8375) time: 0.1434 data: 0.1179 max mem: 9377 +Eval (hcp-train-subset): [28] Total time: 0:00:14 (0.2342 s / it) +Averaged stats (hcp-train-subset): loss: 0.8365 (0.8375) +Eval (hcp-val): [28] [ 0/62] eta: 0:04:38 loss: 0.8389 (0.8389) time: 4.4975 data: 4.4548 max mem: 9377 +Eval (hcp-val): [28] [61/62] eta: 0:00:00 loss: 0.8389 (0.8397) time: 0.1357 data: 0.1084 max mem: 9377 +Eval (hcp-val): [28] Total time: 0:00:14 (0.2267 s / it) +Averaged stats (hcp-val): loss: 0.8389 (0.8397) +Eval (nsd-val): [28] [ 0/62] eta: 0:04:31 loss: 0.7996 (0.7996) time: 4.3742 data: 4.3449 max mem: 9377 +Eval (nsd-val): [28] [61/62] eta: 0:00:00 loss: 0.8128 (0.8128) time: 0.1365 data: 0.1112 max mem: 9377 +Eval (nsd-val): [28] Total time: 0:00:13 (0.2139 s / it) +Averaged stats (nsd-val): loss: 0.8128 (0.8128) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [29] [ 0/6250] eta: 7:37:40 lr: 0.000106 grad: 0.0537 (0.0537) loss: 0.8519 (0.8519) time: 4.3937 data: 4.1760 max mem: 9377 +Train: [29] [ 100/6250] eta: 0:19:55 lr: 0.000106 grad: 0.0713 (0.1031) loss: 0.8292 (0.8341) time: 0.1322 data: 0.0334 max mem: 9377 +Train: [29] [ 200/6250] eta: 0:17:34 lr: 0.000106 grad: 0.0695 (0.0897) loss: 0.8332 (0.8328) time: 0.1805 data: 0.0962 max mem: 9377 +Train: [29] [ 300/6250] eta: 0:16:31 lr: 0.000106 grad: 0.0706 (0.0842) loss: 0.8343 (0.8329) time: 0.1696 data: 0.0877 max mem: 9377 +Train: [29] [ 400/6250] eta: 0:15:44 lr: 0.000106 grad: 0.0689 (0.0810) loss: 0.8282 (0.8324) time: 0.1409 data: 0.0515 max mem: 9377 +Train: [29] [ 500/6250] eta: 0:15:14 lr: 0.000106 grad: 0.0642 (0.0787) loss: 0.8243 (0.8314) time: 0.1339 data: 0.0447 max mem: 9377 +Train: [29] [ 600/6250] eta: 0:14:43 lr: 0.000106 grad: 0.0619 (0.0770) loss: 0.8272 (0.8309) time: 0.1200 data: 0.0289 max mem: 9377 +Train: [29] [ 700/6250] eta: 0:14:20 lr: 0.000106 grad: 0.0684 (0.0757) loss: 0.8354 (0.8309) time: 0.1266 data: 0.0383 max mem: 9377 +Train: [29] [ 800/6250] eta: 0:14:03 lr: 0.000106 grad: 0.0675 (0.0753) loss: 0.8309 (0.8309) time: 0.1657 data: 0.0813 max mem: 9377 +Train: [29] [ 900/6250] eta: 0:13:46 lr: 0.000106 grad: 0.0665 (0.0750) loss: 0.8303 (0.8310) time: 0.1681 data: 0.0777 max mem: 9377 +Train: [29] [1000/6250] eta: 0:13:39 lr: 0.000106 grad: 0.0634 (0.0743) loss: 0.8341 (0.8311) time: 0.1711 data: 0.0790 max mem: 9377 +Train: [29] [1100/6250] eta: 0:13:30 lr: 0.000106 grad: 0.0706 (0.0741) loss: 0.8273 (0.8309) time: 0.2004 data: 0.1117 max mem: 9377 +Train: [29] [1200/6250] eta: 0:13:14 lr: 0.000106 grad: 0.0704 (0.0739) loss: 0.8316 (0.8307) time: 0.1471 data: 0.0578 max mem: 9377 +Train: [29] [1300/6250] eta: 0:13:02 lr: 0.000106 grad: 0.0680 (0.0739) loss: 0.8287 (0.8305) time: 0.1704 data: 0.0917 max mem: 9377 +Train: [29] [1400/6250] eta: 0:12:43 lr: 0.000106 grad: 0.0702 (0.0738) loss: 0.8338 (0.8305) time: 0.1508 data: 0.0617 max mem: 9377 +Train: [29] [1500/6250] eta: 0:12:31 lr: 0.000106 grad: 0.0705 (0.0737) loss: 0.8297 (0.8304) time: 0.1561 data: 0.0721 max mem: 9377 +Train: [29] [1600/6250] eta: 0:12:16 lr: 0.000106 grad: 0.0719 (0.0737) loss: 0.8258 (0.8302) time: 0.1506 data: 0.0628 max mem: 9377 +Train: [29] [1700/6250] eta: 0:12:00 lr: 0.000106 grad: 0.0660 (0.0737) loss: 0.8302 (0.8301) time: 0.1751 data: 0.0827 max mem: 9377 +Train: [29] [1800/6250] eta: 0:11:42 lr: 0.000106 grad: 0.0696 (0.0736) loss: 0.8304 (0.8301) time: 0.1351 data: 0.0434 max mem: 9377 +Train: [29] [1900/6250] eta: 0:11:22 lr: 0.000106 grad: 0.0664 (0.0734) loss: 0.8290 (0.8300) time: 0.1240 data: 0.0417 max mem: 9377 +Train: [29] [2000/6250] eta: 0:11:04 lr: 0.000106 grad: 0.0661 (0.0732) loss: 0.8322 (0.8301) time: 0.1371 data: 0.0473 max mem: 9377 +Train: [29] [2100/6250] eta: 0:10:46 lr: 0.000106 grad: 0.0686 (0.0731) loss: 0.8347 (0.8302) time: 0.1604 data: 0.0789 max mem: 9377 +Train: [29] [2200/6250] eta: 0:10:29 lr: 0.000106 grad: 0.0665 (0.0729) loss: 0.8348 (0.8302) time: 0.1525 data: 0.0540 max mem: 9377 +Train: [29] [2300/6250] eta: 0:10:11 lr: 0.000106 grad: 0.0707 (0.0728) loss: 0.8264 (0.8301) time: 0.1293 data: 0.0431 max mem: 9377 +Train: [29] [2400/6250] eta: 0:09:53 lr: 0.000106 grad: 0.0718 (0.0727) loss: 0.8282 (0.8301) time: 0.1548 data: 0.0753 max mem: 9377 +Train: [29] [2500/6250] eta: 0:09:37 lr: 0.000106 grad: 0.0674 (0.0725) loss: 0.8326 (0.8301) time: 0.1583 data: 0.0804 max mem: 9377 +Train: [29] [2600/6250] eta: 0:09:19 lr: 0.000106 grad: 0.0692 (0.0725) loss: 0.8309 (0.8301) time: 0.1346 data: 0.0539 max mem: 9377 +Train: [29] [2700/6250] eta: 0:09:02 lr: 0.000106 grad: 0.0689 (0.0726) loss: 0.8293 (0.8301) time: 0.1314 data: 0.0504 max mem: 9377 +Train: [29] [2800/6250] eta: 0:08:46 lr: 0.000106 grad: 0.0737 (0.0726) loss: 0.8301 (0.8301) time: 0.1442 data: 0.0634 max mem: 9377 +Train: [29] [2900/6250] eta: 0:08:30 lr: 0.000106 grad: 0.0698 (0.0724) loss: 0.8293 (0.8302) time: 0.1388 data: 0.0587 max mem: 9377 +Train: [29] [3000/6250] eta: 0:08:14 lr: 0.000106 grad: 0.0687 (0.0723) loss: 0.8320 (0.8301) time: 0.1379 data: 0.0524 max mem: 9377 +Train: [29] [3100/6250] eta: 0:07:58 lr: 0.000106 grad: 0.0704 (0.0723) loss: 0.8292 (0.8301) time: 0.1330 data: 0.0447 max mem: 9377 +Train: [29] [3200/6250] eta: 0:07:42 lr: 0.000106 grad: 0.0736 (0.0723) loss: 0.8290 (0.8301) time: 0.1286 data: 0.0469 max mem: 9377 +Train: [29] [3300/6250] eta: 0:07:26 lr: 0.000106 grad: 0.0710 (0.0723) loss: 0.8268 (0.8300) time: 0.1429 data: 0.0613 max mem: 9377 +Train: [29] [3400/6250] eta: 0:07:10 lr: 0.000106 grad: 0.0697 (0.0723) loss: 0.8289 (0.8300) time: 0.1328 data: 0.0464 max mem: 9377 +Train: [29] [3500/6250] eta: 0:06:54 lr: 0.000105 grad: 0.0666 (0.0723) loss: 0.8305 (0.8299) time: 0.1465 data: 0.0678 max mem: 9377 +Train: [29] [3600/6250] eta: 0:06:39 lr: 0.000105 grad: 0.0674 (0.0723) loss: 0.8265 (0.8298) time: 0.1362 data: 0.0485 max mem: 9377 +Train: [29] [3700/6250] eta: 0:06:23 lr: 0.000105 grad: 0.0657 (0.0723) loss: 0.8309 (0.8297) time: 0.1213 data: 0.0361 max mem: 9377 +Train: [29] [3800/6250] eta: 0:06:07 lr: 0.000105 grad: 0.0708 (0.0723) loss: 0.8275 (0.8297) time: 0.1206 data: 0.0364 max mem: 9377 +Train: [29] [3900/6250] eta: 0:05:52 lr: 0.000105 grad: 0.0684 (0.0723) loss: 0.8253 (0.8297) time: 0.1387 data: 0.0560 max mem: 9377 +Train: [29] [4000/6250] eta: 0:05:36 lr: 0.000105 grad: 0.0731 (0.0724) loss: 0.8253 (0.8297) time: 0.1273 data: 0.0446 max mem: 9377 +Train: [29] [4100/6250] eta: 0:05:21 lr: 0.000105 grad: 0.0782 (0.0724) loss: 0.8248 (0.8296) time: 0.1695 data: 0.0904 max mem: 9377 +Train: [29] [4200/6250] eta: 0:05:05 lr: 0.000105 grad: 0.0703 (0.0725) loss: 0.8262 (0.8296) time: 0.1591 data: 0.0779 max mem: 9377 +Train: [29] [4300/6250] eta: 0:04:50 lr: 0.000105 grad: 0.0710 (0.0725) loss: 0.8242 (0.8295) time: 0.1377 data: 0.0551 max mem: 9377 +Train: [29] [4400/6250] eta: 0:04:35 lr: 0.000105 grad: 0.0719 (0.0725) loss: 0.8313 (0.8295) time: 0.1623 data: 0.0811 max mem: 9377 +Train: [29] [4500/6250] eta: 0:04:19 lr: 0.000105 grad: 0.0719 (0.0725) loss: 0.8239 (0.8295) time: 0.1309 data: 0.0457 max mem: 9377 +Train: [29] [4600/6250] eta: 0:04:05 lr: 0.000105 grad: 0.0697 (0.0726) loss: 0.8281 (0.8294) time: 0.1508 data: 0.0603 max mem: 9377 +Train: [29] [4700/6250] eta: 0:03:49 lr: 0.000105 grad: 0.0683 (0.0726) loss: 0.8342 (0.8294) time: 0.1347 data: 0.0459 max mem: 9377 +Train: [29] [4800/6250] eta: 0:03:34 lr: 0.000105 grad: 0.0697 (0.0727) loss: 0.8267 (0.8294) time: 0.1508 data: 0.0665 max mem: 9377 +Train: [29] [4900/6250] eta: 0:03:19 lr: 0.000105 grad: 0.0733 (0.0728) loss: 0.8283 (0.8294) time: 0.1521 data: 0.0678 max mem: 9377 +Train: [29] [5000/6250] eta: 0:03:05 lr: 0.000105 grad: 0.0721 (0.0728) loss: 0.8263 (0.8293) time: 0.1721 data: 0.0868 max mem: 9377 +Train: [29] [5100/6250] eta: 0:02:50 lr: 0.000105 grad: 0.0705 (0.0728) loss: 0.8270 (0.8293) time: 0.1449 data: 0.0623 max mem: 9377 +Train: [29] [5200/6250] eta: 0:02:35 lr: 0.000105 grad: 0.0760 (0.0728) loss: 0.8273 (0.8292) time: 0.1503 data: 0.0715 max mem: 9377 +Train: [29] [5300/6250] eta: 0:02:20 lr: 0.000105 grad: 0.0698 (0.0728) loss: 0.8318 (0.8292) time: 0.1345 data: 0.0413 max mem: 9377 +Train: [29] [5400/6250] eta: 0:02:06 lr: 0.000105 grad: 0.0693 (0.0728) loss: 0.8271 (0.8292) time: 0.1386 data: 0.0616 max mem: 9377 +Train: [29] [5500/6250] eta: 0:01:51 lr: 0.000105 grad: 0.0714 (0.0728) loss: 0.8297 (0.8292) time: 0.1499 data: 0.0703 max mem: 9377 +Train: [29] [5600/6250] eta: 0:01:36 lr: 0.000105 grad: 0.0672 (0.0728) loss: 0.8298 (0.8292) time: 0.1285 data: 0.0447 max mem: 9377 +Train: [29] [5700/6250] eta: 0:01:21 lr: 0.000105 grad: 0.0722 (0.0728) loss: 0.8312 (0.8292) time: 0.1477 data: 0.0593 max mem: 9377 +Train: [29] [5800/6250] eta: 0:01:06 lr: 0.000105 grad: 0.0717 (0.0727) loss: 0.8266 (0.8292) time: 0.1659 data: 0.0788 max mem: 9377 +Train: [29] [5900/6250] eta: 0:00:51 lr: 0.000105 grad: 0.0715 (0.0727) loss: 0.8304 (0.8292) time: 0.1419 data: 0.0628 max mem: 9377 +Train: [29] [6000/6250] eta: 0:00:37 lr: 0.000105 grad: 0.0716 (0.0727) loss: 0.8365 (0.8293) time: 0.1445 data: 0.0653 max mem: 9377 +Train: [29] [6100/6250] eta: 0:00:22 lr: 0.000105 grad: 0.0669 (0.0727) loss: 0.8332 (0.8293) time: 0.1497 data: 0.0694 max mem: 9377 +Train: [29] [6200/6250] eta: 0:00:07 lr: 0.000105 grad: 0.0658 (0.0726) loss: 0.8280 (0.8293) time: 0.1278 data: 0.0435 max mem: 9377 +Train: [29] [6249/6250] eta: 0:00:00 lr: 0.000105 grad: 0.0672 (0.0726) loss: 0.8293 (0.8293) time: 0.1439 data: 0.0658 max mem: 9377 +Train: [29] Total time: 0:15:32 (0.1492 s / it) +Averaged stats: lr: 0.000105 grad: 0.0672 (0.0726) loss: 0.8293 (0.8293) +Eval (hcp-train-subset): [29] [ 0/62] eta: 0:06:21 loss: 0.8393 (0.8393) time: 6.1496 data: 6.1173 max mem: 9377 +Eval (hcp-train-subset): [29] [61/62] eta: 0:00:00 loss: 0.8385 (0.8385) time: 0.1172 data: 0.0920 max mem: 9377 +Eval (hcp-train-subset): [29] Total time: 0:00:14 (0.2339 s / it) +Averaged stats (hcp-train-subset): loss: 0.8385 (0.8385) +Making plots (hcp-train-subset): example=8 +Eval (hcp-val): [29] [ 0/62] eta: 0:04:55 loss: 0.8378 (0.8378) time: 4.7735 data: 4.7421 max mem: 9377 +Eval (hcp-val): [29] [61/62] eta: 0:00:00 loss: 0.8375 (0.8392) time: 0.1420 data: 0.1166 max mem: 9377 +Eval (hcp-val): [29] Total time: 0:00:14 (0.2258 s / it) +Averaged stats (hcp-val): loss: 0.8375 (0.8392) +Making plots (hcp-val): example=37 +Eval (nsd-val): [29] [ 0/62] eta: 0:04:40 loss: 0.8033 (0.8033) time: 4.5177 data: 4.4837 max mem: 9377 +Eval (nsd-val): [29] [61/62] eta: 0:00:00 loss: 0.8093 (0.8116) time: 0.1204 data: 0.0921 max mem: 9377 +Eval (nsd-val): [29] Total time: 0:00:13 (0.2124 s / it) +Averaged stats (nsd-val): loss: 0.8093 (0.8116) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00029.pth +Train: [30] [ 0/6250] eta: 6:38:32 lr: 0.000105 grad: 0.0627 (0.0627) loss: 0.8616 (0.8616) time: 3.8260 data: 3.5805 max mem: 9377 +Train: [30] [ 100/6250] eta: 0:19:32 lr: 0.000105 grad: 0.0661 (0.0943) loss: 0.8434 (0.8361) time: 0.1327 data: 0.0330 max mem: 9377 +Train: [30] [ 200/6250] eta: 0:17:05 lr: 0.000105 grad: 0.0658 (0.0826) loss: 0.8468 (0.8359) time: 0.1378 data: 0.0497 max mem: 9377 +Train: [30] [ 300/6250] eta: 0:16:05 lr: 0.000105 grad: 0.0662 (0.0806) loss: 0.8373 (0.8336) time: 0.1567 data: 0.0743 max mem: 9377 +Train: [30] [ 400/6250] eta: 0:15:20 lr: 0.000105 grad: 0.0658 (0.0788) loss: 0.8363 (0.8329) time: 0.1501 data: 0.0617 max mem: 9377 +Train: [30] [ 500/6250] eta: 0:14:51 lr: 0.000105 grad: 0.0659 (0.0775) loss: 0.8363 (0.8325) time: 0.1573 data: 0.0642 max mem: 9377 +Train: [30] [ 600/6250] eta: 0:14:15 lr: 0.000105 grad: 0.0688 (0.0764) loss: 0.8384 (0.8325) time: 0.1331 data: 0.0438 max mem: 9377 +Train: [30] [ 700/6250] eta: 0:13:48 lr: 0.000105 grad: 0.0672 (0.0756) loss: 0.8324 (0.8324) time: 0.1166 data: 0.0259 max mem: 9377 +Train: [30] [ 800/6250] eta: 0:13:38 lr: 0.000105 grad: 0.0655 (0.0748) loss: 0.8335 (0.8324) time: 0.1632 data: 0.0825 max mem: 9377 +Train: [30] [ 900/6250] eta: 0:13:22 lr: 0.000105 grad: 0.0714 (0.0743) loss: 0.8290 (0.8325) time: 0.1658 data: 0.0888 max mem: 9377 +Train: [30] [1000/6250] eta: 0:13:04 lr: 0.000105 grad: 0.0663 (0.0738) loss: 0.8308 (0.8326) time: 0.1480 data: 0.0693 max mem: 9377 +Train: [30] [1100/6250] eta: 0:12:45 lr: 0.000105 grad: 0.0666 (0.0735) loss: 0.8300 (0.8325) time: 0.1403 data: 0.0541 max mem: 9377 +Train: [30] [1200/6250] eta: 0:12:36 lr: 0.000105 grad: 0.0711 (0.0734) loss: 0.8343 (0.8324) time: 0.1578 data: 0.0749 max mem: 9377 +Train: [30] [1300/6250] eta: 0:12:24 lr: 0.000105 grad: 0.0635 (0.0731) loss: 0.8347 (0.8323) time: 0.1669 data: 0.0787 max mem: 9377 +Train: [30] [1400/6250] eta: 0:12:08 lr: 0.000104 grad: 0.0678 (0.0728) loss: 0.8350 (0.8323) time: 0.1511 data: 0.0635 max mem: 9377 +Train: [30] [1500/6250] eta: 0:11:51 lr: 0.000104 grad: 0.0676 (0.0727) loss: 0.8246 (0.8321) time: 0.1553 data: 0.0743 max mem: 9377 +Train: [30] [1600/6250] eta: 0:11:34 lr: 0.000104 grad: 0.0777 (0.0728) loss: 0.8305 (0.8318) time: 0.1497 data: 0.0699 max mem: 9377 +Train: [30] [1700/6250] eta: 0:11:21 lr: 0.000104 grad: 0.0684 (0.0727) loss: 0.8291 (0.8317) time: 0.1555 data: 0.0766 max mem: 9377 +Train: [30] [1800/6250] eta: 0:11:05 lr: 0.000104 grad: 0.0717 (0.0726) loss: 0.8265 (0.8315) time: 0.1429 data: 0.0599 max mem: 9377 +Train: [30] [1900/6250] eta: 0:10:49 lr: 0.000104 grad: 0.0744 (0.0727) loss: 0.8254 (0.8314) time: 0.1260 data: 0.0415 max mem: 9377 +Train: [30] [2000/6250] eta: 0:10:35 lr: 0.000104 grad: 0.0718 (0.0727) loss: 0.8254 (0.8312) time: 0.1479 data: 0.0623 max mem: 9377 +Train: [30] [2100/6250] eta: 0:10:19 lr: 0.000104 grad: 0.0676 (0.0727) loss: 0.8250 (0.8310) time: 0.1529 data: 0.0760 max mem: 9377 +Train: [30] [2200/6250] eta: 0:10:04 lr: 0.000104 grad: 0.0691 (0.0726) loss: 0.8310 (0.8308) time: 0.1637 data: 0.0841 max mem: 9377 +Train: [30] [2300/6250] eta: 0:09:48 lr: 0.000104 grad: 0.0721 (0.0727) loss: 0.8272 (0.8307) time: 0.1530 data: 0.0698 max mem: 9377 +Train: [30] [2400/6250] eta: 0:09:31 lr: 0.000104 grad: 0.0733 (0.0728) loss: 0.8237 (0.8305) time: 0.1131 data: 0.0260 max mem: 9377 +Train: [30] [2500/6250] eta: 0:09:16 lr: 0.000104 grad: 0.0714 (0.0729) loss: 0.8311 (0.8304) time: 0.1510 data: 0.0694 max mem: 9377 +Train: [30] [2600/6250] eta: 0:09:00 lr: 0.000104 grad: 0.0750 (0.0730) loss: 0.8284 (0.8303) time: 0.1347 data: 0.0528 max mem: 9377 +Train: [30] [2700/6250] eta: 0:08:45 lr: 0.000104 grad: 0.0719 (0.0731) loss: 0.8239 (0.8302) time: 0.1379 data: 0.0602 max mem: 9377 +Train: [30] [2800/6250] eta: 0:08:29 lr: 0.000104 grad: 0.0691 (0.0732) loss: 0.8278 (0.8301) time: 0.1377 data: 0.0588 max mem: 9377 +Train: [30] [2900/6250] eta: 0:08:14 lr: 0.000104 grad: 0.0748 (0.0733) loss: 0.8236 (0.8299) time: 0.1598 data: 0.0823 max mem: 9377 +Train: [30] [3000/6250] eta: 0:07:59 lr: 0.000104 grad: 0.0716 (0.0733) loss: 0.8258 (0.8298) time: 0.1465 data: 0.0614 max mem: 9377 +Train: [30] [3100/6250] eta: 0:07:44 lr: 0.000104 grad: 0.0699 (0.0733) loss: 0.8322 (0.8297) time: 0.1345 data: 0.0520 max mem: 9377 +Train: [30] [3200/6250] eta: 0:07:30 lr: 0.000104 grad: 0.0723 (0.0732) loss: 0.8277 (0.8296) time: 0.1502 data: 0.0651 max mem: 9377 +Train: [30] [3300/6250] eta: 0:07:15 lr: 0.000104 grad: 0.0748 (0.0733) loss: 0.8248 (0.8295) time: 0.1398 data: 0.0605 max mem: 9377 +Train: [30] [3400/6250] eta: 0:07:00 lr: 0.000104 grad: 0.0741 (0.0733) loss: 0.8320 (0.8294) time: 0.1357 data: 0.0520 max mem: 9377 +Train: [30] [3500/6250] eta: 0:06:46 lr: 0.000104 grad: 0.0689 (0.0733) loss: 0.8281 (0.8294) time: 0.1655 data: 0.0865 max mem: 9377 +Train: [30] [3600/6250] eta: 0:06:31 lr: 0.000104 grad: 0.0727 (0.0733) loss: 0.8303 (0.8294) time: 0.1503 data: 0.0679 max mem: 9377 +Train: [30] [3700/6250] eta: 0:06:16 lr: 0.000104 grad: 0.0718 (0.0733) loss: 0.8334 (0.8294) time: 0.1418 data: 0.0571 max mem: 9377 +Train: [30] [3800/6250] eta: 0:06:01 lr: 0.000104 grad: 0.0701 (0.0733) loss: 0.8317 (0.8294) time: 0.1313 data: 0.0469 max mem: 9377 +Train: [30] [3900/6250] eta: 0:05:46 lr: 0.000104 grad: 0.0718 (0.0733) loss: 0.8251 (0.8294) time: 0.1349 data: 0.0477 max mem: 9377 +Train: [30] [4000/6250] eta: 0:05:31 lr: 0.000104 grad: 0.0714 (0.0733) loss: 0.8255 (0.8294) time: 0.1336 data: 0.0553 max mem: 9377 +Train: [30] [4100/6250] eta: 0:05:16 lr: 0.000104 grad: 0.0769 (0.0734) loss: 0.8300 (0.8293) time: 0.1583 data: 0.0709 max mem: 9377 +Train: [30] [4200/6250] eta: 0:05:01 lr: 0.000104 grad: 0.0734 (0.0735) loss: 0.8363 (0.8293) time: 0.1416 data: 0.0613 max mem: 9377 +Train: [30] [4300/6250] eta: 0:04:46 lr: 0.000104 grad: 0.0678 (0.0736) loss: 0.8226 (0.8293) time: 0.1426 data: 0.0620 max mem: 9377 +Train: [30] [4400/6250] eta: 0:04:31 lr: 0.000104 grad: 0.0724 (0.0737) loss: 0.8272 (0.8293) time: 0.1305 data: 0.0462 max mem: 9377 +Train: [30] [4500/6250] eta: 0:04:16 lr: 0.000104 grad: 0.0753 (0.0738) loss: 0.8281 (0.8292) time: 0.1387 data: 0.0463 max mem: 9377 +Train: [30] [4600/6250] eta: 0:04:01 lr: 0.000104 grad: 0.0751 (0.0740) loss: 0.8308 (0.8292) time: 0.1128 data: 0.0337 max mem: 9377 +Train: [30] [4700/6250] eta: 0:03:46 lr: 0.000104 grad: 0.0750 (0.0740) loss: 0.8238 (0.8291) time: 0.1406 data: 0.0584 max mem: 9377 +Train: [30] [4800/6250] eta: 0:03:32 lr: 0.000104 grad: 0.0724 (0.0741) loss: 0.8238 (0.8291) time: 0.1691 data: 0.0872 max mem: 9377 +Train: [30] [4900/6250] eta: 0:03:17 lr: 0.000104 grad: 0.0730 (0.0741) loss: 0.8296 (0.8290) time: 0.1272 data: 0.0477 max mem: 9377 +Train: [30] [5000/6250] eta: 0:03:02 lr: 0.000104 grad: 0.0771 (0.0741) loss: 0.8269 (0.8290) time: 0.1377 data: 0.0478 max mem: 9377 +Train: [30] [5100/6250] eta: 0:02:48 lr: 0.000104 grad: 0.0763 (0.0741) loss: 0.8270 (0.8289) time: 0.1462 data: 0.0641 max mem: 9377 +Train: [30] [5200/6250] eta: 0:02:33 lr: 0.000104 grad: 0.0725 (0.0741) loss: 0.8262 (0.8289) time: 0.1533 data: 0.0779 max mem: 9377 +Train: [30] [5300/6250] eta: 0:02:18 lr: 0.000104 grad: 0.0694 (0.0742) loss: 0.8282 (0.8288) time: 0.1470 data: 0.0608 max mem: 9377 +Train: [30] [5400/6250] eta: 0:02:04 lr: 0.000103 grad: 0.0722 (0.0742) loss: 0.8320 (0.8289) time: 0.1572 data: 0.0575 max mem: 9377 +Train: [30] [5500/6250] eta: 0:01:49 lr: 0.000103 grad: 0.0727 (0.0742) loss: 0.8240 (0.8288) time: 0.1672 data: 0.0922 max mem: 9377 +Train: [30] [5600/6250] eta: 0:01:35 lr: 0.000103 grad: 0.0714 (0.0742) loss: 0.8264 (0.8288) time: 0.1657 data: 0.0848 max mem: 9377 +Train: [30] [5700/6250] eta: 0:01:20 lr: 0.000103 grad: 0.0726 (0.0741) loss: 0.8257 (0.8288) time: 0.1487 data: 0.0622 max mem: 9377 +Train: [30] [5800/6250] eta: 0:01:06 lr: 0.000103 grad: 0.0814 (0.0742) loss: 0.8250 (0.8287) time: 0.1665 data: 0.0884 max mem: 9377 +Train: [30] [5900/6250] eta: 0:00:51 lr: 0.000103 grad: 0.0686 (0.0742) loss: 0.8260 (0.8287) time: 0.1420 data: 0.0612 max mem: 9377 +Train: [30] [6000/6250] eta: 0:00:36 lr: 0.000103 grad: 0.0776 (0.0742) loss: 0.8263 (0.8287) time: 0.1299 data: 0.0551 max mem: 9377 +Train: [30] [6100/6250] eta: 0:00:22 lr: 0.000103 grad: 0.0736 (0.0742) loss: 0.8225 (0.8286) time: 0.1475 data: 0.0669 max mem: 9377 +Train: [30] [6200/6250] eta: 0:00:07 lr: 0.000103 grad: 0.0693 (0.0742) loss: 0.8351 (0.8286) time: 0.1453 data: 0.0610 max mem: 9377 +Train: [30] [6249/6250] eta: 0:00:00 lr: 0.000103 grad: 0.0685 (0.0742) loss: 0.8297 (0.8286) time: 0.1324 data: 0.0487 max mem: 9377 +Train: [30] Total time: 0:15:27 (0.1484 s / it) +Averaged stats: lr: 0.000103 grad: 0.0685 (0.0742) loss: 0.8297 (0.8286) +Eval (hcp-train-subset): [30] [ 0/62] eta: 0:04:16 loss: 0.8405 (0.8405) time: 4.1329 data: 4.0716 max mem: 9377 +Eval (hcp-train-subset): [30] [61/62] eta: 0:00:00 loss: 0.8353 (0.8375) time: 0.1245 data: 0.0974 max mem: 9377 +Eval (hcp-train-subset): [30] Total time: 0:00:13 (0.2199 s / it) +Averaged stats (hcp-train-subset): loss: 0.8353 (0.8375) +Eval (hcp-val): [30] [ 0/62] eta: 0:05:06 loss: 0.8383 (0.8383) time: 4.9370 data: 4.9058 max mem: 9377 +Eval (hcp-val): [30] [61/62] eta: 0:00:00 loss: 0.8374 (0.8390) time: 0.1294 data: 0.1037 max mem: 9377 +Eval (hcp-val): [30] Total time: 0:00:13 (0.2158 s / it) +Averaged stats (hcp-val): loss: 0.8374 (0.8390) +Eval (nsd-val): [30] [ 0/62] eta: 0:03:20 loss: 0.8003 (0.8003) time: 3.2280 data: 3.1487 max mem: 9377 +Eval (nsd-val): [30] [61/62] eta: 0:00:00 loss: 0.8109 (0.8109) time: 0.1230 data: 0.0962 max mem: 9377 +Eval (nsd-val): [30] Total time: 0:00:13 (0.2125 s / it) +Averaged stats (nsd-val): loss: 0.8109 (0.8109) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [31] [ 0/6250] eta: 9:40:24 lr: 0.000103 grad: 0.1365 (0.1365) loss: 0.8027 (0.8027) time: 5.5719 data: 5.4751 max mem: 9377 +Train: [31] [ 100/6250] eta: 0:19:30 lr: 0.000103 grad: 0.0689 (0.0791) loss: 0.8378 (0.8410) time: 0.1419 data: 0.0608 max mem: 9377 +Train: [31] [ 200/6250] eta: 0:16:57 lr: 0.000103 grad: 0.0721 (0.0768) loss: 0.8298 (0.8352) time: 0.1436 data: 0.0554 max mem: 9377 +Train: [31] [ 300/6250] eta: 0:15:51 lr: 0.000103 grad: 0.0675 (0.0753) loss: 0.8326 (0.8324) time: 0.1428 data: 0.0497 max mem: 9377 +Train: [31] [ 400/6250] eta: 0:15:04 lr: 0.000103 grad: 0.0696 (0.0746) loss: 0.8329 (0.8322) time: 0.1287 data: 0.0328 max mem: 9377 +Train: [31] [ 500/6250] eta: 0:14:49 lr: 0.000103 grad: 0.0653 (0.0740) loss: 0.8341 (0.8318) time: 0.1410 data: 0.0580 max mem: 9377 +Train: [31] [ 600/6250] eta: 0:14:25 lr: 0.000103 grad: 0.0652 (0.0736) loss: 0.8298 (0.8312) time: 0.1472 data: 0.0680 max mem: 9377 +Train: [31] [ 700/6250] eta: 0:14:04 lr: 0.000103 grad: 0.0669 (0.0735) loss: 0.8304 (0.8309) time: 0.1391 data: 0.0512 max mem: 9377 +Train: [31] [ 800/6250] eta: 0:13:48 lr: 0.000103 grad: 0.0699 (0.0734) loss: 0.8283 (0.8307) time: 0.1278 data: 0.0425 max mem: 9377 +Train: [31] [ 900/6250] eta: 0:13:30 lr: 0.000103 grad: 0.0720 (0.0735) loss: 0.8221 (0.8305) time: 0.1400 data: 0.0502 max mem: 9377 +Train: [31] [1000/6250] eta: 0:13:10 lr: 0.000103 grad: 0.0737 (0.0733) loss: 0.8294 (0.8304) time: 0.1500 data: 0.0697 max mem: 9377 +Train: [31] [1100/6250] eta: 0:12:48 lr: 0.000103 grad: 0.0695 (0.0735) loss: 0.8325 (0.8303) time: 0.1424 data: 0.0542 max mem: 9377 +Train: [31] [1200/6250] eta: 0:12:24 lr: 0.000103 grad: 0.0694 (0.0733) loss: 0.8236 (0.8303) time: 0.1237 data: 0.0309 max mem: 9377 +Train: [31] [1300/6250] eta: 0:12:04 lr: 0.000103 grad: 0.0679 (0.0732) loss: 0.8307 (0.8302) time: 0.1405 data: 0.0521 max mem: 9377 +Train: [31] [1400/6250] eta: 0:11:47 lr: 0.000103 grad: 0.0753 (0.0734) loss: 0.8277 (0.8301) time: 0.1296 data: 0.0535 max mem: 9377 +Train: [31] [1500/6250] eta: 0:11:31 lr: 0.000103 grad: 0.0688 (0.0733) loss: 0.8332 (0.8300) time: 0.1501 data: 0.0670 max mem: 9377 +Train: [31] [1600/6250] eta: 0:11:15 lr: 0.000103 grad: 0.0733 (0.0735) loss: 0.8227 (0.8296) time: 0.1206 data: 0.0392 max mem: 9377 +Train: [31] [1700/6250] eta: 0:11:01 lr: 0.000103 grad: 0.0665 (0.0735) loss: 0.8283 (0.8295) time: 0.1399 data: 0.0543 max mem: 9377 +Train: [31] [1800/6250] eta: 0:10:47 lr: 0.000103 grad: 0.0705 (0.0736) loss: 0.8282 (0.8294) time: 0.1496 data: 0.0677 max mem: 9377 +Train: [31] [1900/6250] eta: 0:10:32 lr: 0.000103 grad: 0.0706 (0.0736) loss: 0.8255 (0.8292) time: 0.1526 data: 0.0751 max mem: 9377 +Train: [31] [2000/6250] eta: 0:10:19 lr: 0.000103 grad: 0.0722 (0.0736) loss: 0.8283 (0.8291) time: 0.1342 data: 0.0557 max mem: 9377 +Train: [31] [2100/6250] eta: 0:10:04 lr: 0.000103 grad: 0.0776 (0.0737) loss: 0.8208 (0.8289) time: 0.1487 data: 0.0596 max mem: 9377 +Train: [31] [2200/6250] eta: 0:09:50 lr: 0.000103 grad: 0.0766 (0.0738) loss: 0.8272 (0.8288) time: 0.1605 data: 0.0822 max mem: 9377 +Train: [31] [2300/6250] eta: 0:09:36 lr: 0.000103 grad: 0.0734 (0.0740) loss: 0.8280 (0.8286) time: 0.1677 data: 0.0922 max mem: 9377 +Train: [31] [2400/6250] eta: 0:09:22 lr: 0.000103 grad: 0.0742 (0.0740) loss: 0.8283 (0.8286) time: 0.1278 data: 0.0467 max mem: 9377 +Train: [31] [2500/6250] eta: 0:09:07 lr: 0.000103 grad: 0.0753 (0.0740) loss: 0.8234 (0.8285) time: 0.1401 data: 0.0552 max mem: 9377 +Train: [31] [2600/6250] eta: 0:08:51 lr: 0.000103 grad: 0.0743 (0.0740) loss: 0.8272 (0.8284) time: 0.1427 data: 0.0677 max mem: 9377 +Train: [31] [2700/6250] eta: 0:08:37 lr: 0.000103 grad: 0.0718 (0.0741) loss: 0.8244 (0.8283) time: 0.1461 data: 0.0614 max mem: 9377 +Train: [31] [2800/6250] eta: 0:08:23 lr: 0.000103 grad: 0.0714 (0.0743) loss: 0.8267 (0.8282) time: 0.1497 data: 0.0654 max mem: 9377 +Train: [31] [2900/6250] eta: 0:08:08 lr: 0.000103 grad: 0.0756 (0.0744) loss: 0.8204 (0.8280) time: 0.1479 data: 0.0616 max mem: 9377 +Train: [31] [3000/6250] eta: 0:07:55 lr: 0.000103 grad: 0.0730 (0.0746) loss: 0.8213 (0.8279) time: 0.1467 data: 0.0656 max mem: 9377 +Train: [31] [3100/6250] eta: 0:07:40 lr: 0.000103 grad: 0.0788 (0.0747) loss: 0.8225 (0.8277) time: 0.1544 data: 0.0720 max mem: 9377 +Train: [31] [3200/6250] eta: 0:07:25 lr: 0.000102 grad: 0.0747 (0.0748) loss: 0.8257 (0.8276) time: 0.1373 data: 0.0542 max mem: 9377 +Train: [31] [3300/6250] eta: 0:07:10 lr: 0.000102 grad: 0.0758 (0.0749) loss: 0.8214 (0.8275) time: 0.1490 data: 0.0673 max mem: 9377 +Train: [31] [3400/6250] eta: 0:06:56 lr: 0.000102 grad: 0.0736 (0.0750) loss: 0.8270 (0.8274) time: 0.1817 data: 0.1066 max mem: 9377 +Train: [31] [3500/6250] eta: 0:06:41 lr: 0.000102 grad: 0.0779 (0.0751) loss: 0.8262 (0.8273) time: 0.1450 data: 0.0667 max mem: 9377 +Train: [31] [3600/6250] eta: 0:06:26 lr: 0.000102 grad: 0.0796 (0.0752) loss: 0.8299 (0.8273) time: 0.1439 data: 0.0583 max mem: 9377 +Train: [31] [3700/6250] eta: 0:06:12 lr: 0.000102 grad: 0.0786 (0.0754) loss: 0.8220 (0.8272) time: 0.1310 data: 0.0472 max mem: 9377 +Train: [31] [3800/6250] eta: 0:05:57 lr: 0.000102 grad: 0.0760 (0.0754) loss: 0.8213 (0.8271) time: 0.1463 data: 0.0596 max mem: 9377 +Train: [31] [3900/6250] eta: 0:05:42 lr: 0.000102 grad: 0.0731 (0.0756) loss: 0.8261 (0.8269) time: 0.1390 data: 0.0545 max mem: 9377 +Train: [31] [4000/6250] eta: 0:05:28 lr: 0.000102 grad: 0.0875 (0.0757) loss: 0.8151 (0.8269) time: 0.1373 data: 0.0575 max mem: 9377 +Train: [31] [4100/6250] eta: 0:05:13 lr: 0.000102 grad: 0.0829 (0.0758) loss: 0.8191 (0.8268) time: 0.1373 data: 0.0491 max mem: 9377 +Train: [31] [4200/6250] eta: 0:04:58 lr: 0.000102 grad: 0.0756 (0.0759) loss: 0.8247 (0.8267) time: 0.1376 data: 0.0511 max mem: 9377 +Train: [31] [4300/6250] eta: 0:04:44 lr: 0.000102 grad: 0.0698 (0.0759) loss: 0.8265 (0.8267) time: 0.1581 data: 0.0763 max mem: 9377 +Train: [31] [4400/6250] eta: 0:04:29 lr: 0.000102 grad: 0.0829 (0.0761) loss: 0.8244 (0.8266) time: 0.1360 data: 0.0468 max mem: 9377 +Train: [31] [4500/6250] eta: 0:04:14 lr: 0.000102 grad: 0.0733 (0.0761) loss: 0.8245 (0.8265) time: 0.1573 data: 0.0746 max mem: 9377 +Train: [31] [4600/6250] eta: 0:04:00 lr: 0.000102 grad: 0.0775 (0.0762) loss: 0.8184 (0.8265) time: 0.1456 data: 0.0704 max mem: 9377 +Train: [31] [4700/6250] eta: 0:03:45 lr: 0.000102 grad: 0.0748 (0.0762) loss: 0.8282 (0.8265) time: 0.1116 data: 0.0301 max mem: 9377 +Train: [31] [4800/6250] eta: 0:03:30 lr: 0.000102 grad: 0.0706 (0.0762) loss: 0.8296 (0.8265) time: 0.1573 data: 0.0801 max mem: 9377 +Train: [31] [4900/6250] eta: 0:03:16 lr: 0.000102 grad: 0.0691 (0.0762) loss: 0.8238 (0.8264) time: 0.1405 data: 0.0635 max mem: 9377 +Train: [31] [5000/6250] eta: 0:03:01 lr: 0.000102 grad: 0.0720 (0.0762) loss: 0.8269 (0.8264) time: 0.1299 data: 0.0409 max mem: 9377 +Train: [31] [5100/6250] eta: 0:02:46 lr: 0.000102 grad: 0.0747 (0.0762) loss: 0.8206 (0.8263) time: 0.1243 data: 0.0440 max mem: 9377 +Train: [31] [5200/6250] eta: 0:02:32 lr: 0.000102 grad: 0.0773 (0.0762) loss: 0.8261 (0.8263) time: 0.1433 data: 0.0590 max mem: 9377 +Train: [31] [5300/6250] eta: 0:02:17 lr: 0.000102 grad: 0.0767 (0.0762) loss: 0.8238 (0.8262) time: 0.1579 data: 0.0843 max mem: 9377 +Train: [31] [5400/6250] eta: 0:02:03 lr: 0.000102 grad: 0.0736 (0.0762) loss: 0.8298 (0.8261) time: 0.1472 data: 0.0676 max mem: 9377 +Train: [31] [5500/6250] eta: 0:01:48 lr: 0.000102 grad: 0.0745 (0.0763) loss: 0.8245 (0.8261) time: 0.1873 data: 0.1020 max mem: 9377 +Train: [31] [5600/6250] eta: 0:01:34 lr: 0.000102 grad: 0.0775 (0.0763) loss: 0.8285 (0.8260) time: 0.1572 data: 0.0732 max mem: 9377 +Train: [31] [5700/6250] eta: 0:01:20 lr: 0.000102 grad: 0.0751 (0.0763) loss: 0.8221 (0.8260) time: 0.1551 data: 0.0702 max mem: 9377 +Train: [31] [5800/6250] eta: 0:01:05 lr: 0.000102 grad: 0.0747 (0.0763) loss: 0.8249 (0.8259) time: 0.1477 data: 0.0636 max mem: 9377 +Train: [31] [5900/6250] eta: 0:00:50 lr: 0.000102 grad: 0.0722 (0.0764) loss: 0.8243 (0.8259) time: 0.1367 data: 0.0563 max mem: 9377 +Train: [31] [6000/6250] eta: 0:00:36 lr: 0.000102 grad: 0.0774 (0.0764) loss: 0.8189 (0.8259) time: 0.1447 data: 0.0664 max mem: 9377 +Train: [31] [6100/6250] eta: 0:00:21 lr: 0.000102 grad: 0.0690 (0.0763) loss: 0.8253 (0.8258) time: 0.1598 data: 0.0811 max mem: 9377 +Train: [31] [6200/6250] eta: 0:00:07 lr: 0.000102 grad: 0.0775 (0.0764) loss: 0.8288 (0.8258) time: 0.1335 data: 0.0531 max mem: 9377 +Train: [31] [6249/6250] eta: 0:00:00 lr: 0.000102 grad: 0.0762 (0.0764) loss: 0.8192 (0.8258) time: 0.1315 data: 0.0472 max mem: 9377 +Train: [31] Total time: 0:15:13 (0.1462 s / it) +Averaged stats: lr: 0.000102 grad: 0.0762 (0.0764) loss: 0.8192 (0.8258) +Eval (hcp-train-subset): [31] [ 0/62] eta: 0:04:44 loss: 0.8375 (0.8375) time: 4.5887 data: 4.5579 max mem: 9377 +Eval (hcp-train-subset): [31] [61/62] eta: 0:00:00 loss: 0.8385 (0.8366) time: 0.1159 data: 0.0892 max mem: 9377 +Eval (hcp-train-subset): [31] Total time: 0:00:12 (0.2060 s / it) +Averaged stats (hcp-train-subset): loss: 0.8385 (0.8366) +Eval (hcp-val): [31] [ 0/62] eta: 0:03:35 loss: 0.8364 (0.8364) time: 3.4728 data: 3.3916 max mem: 9377 +Eval (hcp-val): [31] [61/62] eta: 0:00:00 loss: 0.8364 (0.8388) time: 0.1350 data: 0.1097 max mem: 9377 +Eval (hcp-val): [31] Total time: 0:00:13 (0.2113 s / it) +Averaged stats (hcp-val): loss: 0.8364 (0.8388) +Eval (nsd-val): [31] [ 0/62] eta: 0:03:51 loss: 0.8038 (0.8038) time: 3.7270 data: 3.6716 max mem: 9377 +Eval (nsd-val): [31] [61/62] eta: 0:00:00 loss: 0.8134 (0.8130) time: 0.1068 data: 0.0817 max mem: 9377 +Eval (nsd-val): [31] Total time: 0:00:12 (0.1982 s / it) +Averaged stats (nsd-val): loss: 0.8134 (0.8130) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [32] [ 0/6250] eta: 7:46:53 lr: 0.000102 grad: 0.0940 (0.0940) loss: 0.8414 (0.8414) time: 4.4821 data: 4.1715 max mem: 9377 +Train: [32] [ 100/6250] eta: 0:19:55 lr: 0.000102 grad: 0.0693 (0.0758) loss: 0.8383 (0.8399) time: 0.1462 data: 0.0499 max mem: 9377 +Train: [32] [ 200/6250] eta: 0:17:41 lr: 0.000102 grad: 0.0702 (0.0736) loss: 0.8384 (0.8383) time: 0.1541 data: 0.0681 max mem: 9377 +Train: [32] [ 300/6250] eta: 0:16:36 lr: 0.000102 grad: 0.0725 (0.0734) loss: 0.8339 (0.8371) time: 0.1545 data: 0.0654 max mem: 9377 +Train: [32] [ 400/6250] eta: 0:15:49 lr: 0.000102 grad: 0.0720 (0.0737) loss: 0.8343 (0.8352) time: 0.1422 data: 0.0547 max mem: 9377 +Train: [32] [ 500/6250] eta: 0:15:11 lr: 0.000102 grad: 0.0730 (0.0746) loss: 0.8353 (0.8337) time: 0.1484 data: 0.0606 max mem: 9377 +Train: [32] [ 600/6250] eta: 0:14:42 lr: 0.000102 grad: 0.0733 (0.0750) loss: 0.8323 (0.8323) time: 0.1433 data: 0.0586 max mem: 9377 +Train: [32] [ 700/6250] eta: 0:14:16 lr: 0.000102 grad: 0.0725 (0.0752) loss: 0.8295 (0.8314) time: 0.1218 data: 0.0289 max mem: 9377 +Train: [32] [ 800/6250] eta: 0:13:47 lr: 0.000101 grad: 0.0720 (0.0754) loss: 0.8275 (0.8307) time: 0.1169 data: 0.0277 max mem: 9377 +Train: [32] [ 900/6250] eta: 0:13:23 lr: 0.000101 grad: 0.0764 (0.0756) loss: 0.8210 (0.8299) time: 0.1216 data: 0.0338 max mem: 9377 +Train: [32] [1000/6250] eta: 0:13:00 lr: 0.000101 grad: 0.0699 (0.0756) loss: 0.8238 (0.8294) time: 0.1388 data: 0.0530 max mem: 9377 +Train: [32] [1100/6250] eta: 0:12:43 lr: 0.000101 grad: 0.0748 (0.0755) loss: 0.8190 (0.8290) time: 0.1572 data: 0.0734 max mem: 9377 +Train: [32] [1200/6250] eta: 0:12:27 lr: 0.000101 grad: 0.0738 (0.0755) loss: 0.8244 (0.8285) time: 0.1343 data: 0.0496 max mem: 9377 +Train: [32] [1300/6250] eta: 0:12:11 lr: 0.000101 grad: 0.0745 (0.0757) loss: 0.8193 (0.8279) time: 0.1286 data: 0.0432 max mem: 9377 +Train: [32] [1400/6250] eta: 0:12:04 lr: 0.000101 grad: 0.0793 (0.0762) loss: 0.8226 (0.8275) time: 0.1775 data: 0.0967 max mem: 9377 +Train: [32] [1500/6250] eta: 0:11:50 lr: 0.000101 grad: 0.0712 (0.0765) loss: 0.8198 (0.8271) time: 0.1761 data: 0.0988 max mem: 9377 +Train: [32] [1600/6250] eta: 0:11:37 lr: 0.000101 grad: 0.0750 (0.0765) loss: 0.8277 (0.8268) time: 0.1246 data: 0.0476 max mem: 9377 +Train: [32] [1700/6250] eta: 0:11:25 lr: 0.000101 grad: 0.0740 (0.0766) loss: 0.8262 (0.8267) time: 0.1900 data: 0.1049 max mem: 9377 +Train: [32] [1800/6250] eta: 0:11:09 lr: 0.000101 grad: 0.0803 (0.0768) loss: 0.8242 (0.8264) time: 0.1605 data: 0.0765 max mem: 9377 +Train: [32] [1900/6250] eta: 0:10:58 lr: 0.000101 grad: 0.0775 (0.0767) loss: 0.8243 (0.8264) time: 0.1583 data: 0.0778 max mem: 9377 +Train: [32] [2000/6250] eta: 0:10:43 lr: 0.000101 grad: 0.0731 (0.0767) loss: 0.8248 (0.8263) time: 0.1642 data: 0.0808 max mem: 9377 +Train: [32] [2100/6250] eta: 0:10:26 lr: 0.000101 grad: 0.0717 (0.0768) loss: 0.8240 (0.8261) time: 0.1424 data: 0.0613 max mem: 9377 +Train: [32] [2200/6250] eta: 0:10:09 lr: 0.000101 grad: 0.0772 (0.0768) loss: 0.8294 (0.8259) time: 0.1400 data: 0.0582 max mem: 9377 +Train: [32] [2300/6250] eta: 0:09:51 lr: 0.000101 grad: 0.0703 (0.0767) loss: 0.8241 (0.8258) time: 0.1349 data: 0.0542 max mem: 9377 +Train: [32] [2400/6250] eta: 0:09:36 lr: 0.000101 grad: 0.0755 (0.0767) loss: 0.8273 (0.8258) time: 0.1380 data: 0.0591 max mem: 9377 +Train: [32] [2500/6250] eta: 0:09:19 lr: 0.000101 grad: 0.0755 (0.0768) loss: 0.8271 (0.8257) time: 0.1403 data: 0.0559 max mem: 9377 +Train: [32] [2600/6250] eta: 0:09:03 lr: 0.000101 grad: 0.0850 (0.0768) loss: 0.8216 (0.8256) time: 0.1397 data: 0.0520 max mem: 9377 +Train: [32] [2700/6250] eta: 0:08:47 lr: 0.000101 grad: 0.0730 (0.0769) loss: 0.8266 (0.8256) time: 0.1574 data: 0.0702 max mem: 9377 +Train: [32] [2800/6250] eta: 0:08:32 lr: 0.000101 grad: 0.0771 (0.0768) loss: 0.8250 (0.8256) time: 0.1213 data: 0.0395 max mem: 9377 +Train: [32] [2900/6250] eta: 0:08:17 lr: 0.000101 grad: 0.0795 (0.0768) loss: 0.8202 (0.8257) time: 0.1255 data: 0.0366 max mem: 9377 +Train: [32] [3000/6250] eta: 0:08:02 lr: 0.000101 grad: 0.0723 (0.0769) loss: 0.8249 (0.8256) time: 0.1331 data: 0.0487 max mem: 9377 +Train: [32] [3100/6250] eta: 0:07:47 lr: 0.000101 grad: 0.0710 (0.0770) loss: 0.8216 (0.8256) time: 0.1555 data: 0.0781 max mem: 9377 +Train: [32] [3200/6250] eta: 0:07:32 lr: 0.000101 grad: 0.0793 (0.0770) loss: 0.8146 (0.8255) time: 0.1519 data: 0.0698 max mem: 9377 +Train: [32] [3300/6250] eta: 0:07:17 lr: 0.000101 grad: 0.0788 (0.0771) loss: 0.8207 (0.8254) time: 0.1454 data: 0.0545 max mem: 9377 +Train: [32] [3400/6250] eta: 0:07:02 lr: 0.000101 grad: 0.0821 (0.0771) loss: 0.8212 (0.8254) time: 0.1272 data: 0.0422 max mem: 9377 +Train: [32] [3500/6250] eta: 0:06:47 lr: 0.000101 grad: 0.0732 (0.0771) loss: 0.8285 (0.8254) time: 0.1476 data: 0.0614 max mem: 9377 +Train: [32] [3600/6250] eta: 0:06:32 lr: 0.000101 grad: 0.0778 (0.0772) loss: 0.8232 (0.8253) time: 0.1649 data: 0.0827 max mem: 9377 +Train: [32] [3700/6250] eta: 0:06:17 lr: 0.000101 grad: 0.0781 (0.0773) loss: 0.8251 (0.8252) time: 0.1158 data: 0.0236 max mem: 9377 +Train: [32] [3800/6250] eta: 0:06:02 lr: 0.000101 grad: 0.0735 (0.0773) loss: 0.8192 (0.8252) time: 0.1261 data: 0.0419 max mem: 9377 +Train: [32] [3900/6250] eta: 0:05:47 lr: 0.000101 grad: 0.0758 (0.0773) loss: 0.8226 (0.8253) time: 0.1535 data: 0.0753 max mem: 9377 +Train: [32] [4000/6250] eta: 0:05:32 lr: 0.000101 grad: 0.0773 (0.0773) loss: 0.8190 (0.8252) time: 0.1587 data: 0.0802 max mem: 9377 +Train: [32] [4100/6250] eta: 0:05:17 lr: 0.000101 grad: 0.0696 (0.0773) loss: 0.8258 (0.8252) time: 0.1483 data: 0.0678 max mem: 9377 +Train: [32] [4200/6250] eta: 0:05:02 lr: 0.000101 grad: 0.0704 (0.0772) loss: 0.8266 (0.8252) time: 0.1530 data: 0.0713 max mem: 9377 +Train: [32] [4300/6250] eta: 0:04:47 lr: 0.000101 grad: 0.0727 (0.0771) loss: 0.8314 (0.8252) time: 0.1516 data: 0.0670 max mem: 9377 +Train: [32] [4400/6250] eta: 0:04:33 lr: 0.000101 grad: 0.0725 (0.0771) loss: 0.8265 (0.8252) time: 0.1338 data: 0.0527 max mem: 9377 +Train: [32] [4500/6250] eta: 0:04:17 lr: 0.000101 grad: 0.0743 (0.0770) loss: 0.8319 (0.8253) time: 0.1426 data: 0.0583 max mem: 9377 +Train: [32] [4600/6250] eta: 0:04:03 lr: 0.000101 grad: 0.0758 (0.0770) loss: 0.8306 (0.8253) time: 0.1550 data: 0.0764 max mem: 9377 +Train: [32] [4700/6250] eta: 0:03:48 lr: 0.000100 grad: 0.0675 (0.0769) loss: 0.8338 (0.8254) time: 0.1405 data: 0.0557 max mem: 9377 +Train: [32] [4800/6250] eta: 0:03:33 lr: 0.000100 grad: 0.0728 (0.0769) loss: 0.8309 (0.8254) time: 0.1327 data: 0.0550 max mem: 9377 +Train: [32] [4900/6250] eta: 0:03:18 lr: 0.000100 grad: 0.0752 (0.0769) loss: 0.8272 (0.8254) time: 0.1559 data: 0.0743 max mem: 9377 +Train: [32] [5000/6250] eta: 0:03:03 lr: 0.000100 grad: 0.0717 (0.0768) loss: 0.8311 (0.8255) time: 0.1429 data: 0.0596 max mem: 9377 +Train: [32] [5100/6250] eta: 0:02:48 lr: 0.000100 grad: 0.0690 (0.0768) loss: 0.8319 (0.8255) time: 0.1446 data: 0.0627 max mem: 9377 +Train: [32] [5200/6250] eta: 0:02:34 lr: 0.000100 grad: 0.0750 (0.0768) loss: 0.8305 (0.8256) time: 0.1836 data: 0.0957 max mem: 9377 +Train: [32] [5300/6250] eta: 0:02:20 lr: 0.000100 grad: 0.0736 (0.0767) loss: 0.8295 (0.8257) time: 0.1589 data: 0.0839 max mem: 9377 +Train: [32] [5400/6250] eta: 0:02:05 lr: 0.000100 grad: 0.0708 (0.0767) loss: 0.8320 (0.8257) time: 0.1471 data: 0.0662 max mem: 9377 +Train: [32] [5500/6250] eta: 0:01:50 lr: 0.000100 grad: 0.0747 (0.0767) loss: 0.8288 (0.8257) time: 0.1551 data: 0.0717 max mem: 9377 +Train: [32] [5600/6250] eta: 0:01:36 lr: 0.000100 grad: 0.0819 (0.0768) loss: 0.8316 (0.8257) time: 0.1416 data: 0.0500 max mem: 9377 +Train: [32] [5700/6250] eta: 0:01:21 lr: 0.000100 grad: 0.0702 (0.0768) loss: 0.8268 (0.8258) time: 0.1236 data: 0.0429 max mem: 9377 +Train: [32] [5800/6250] eta: 0:01:06 lr: 0.000100 grad: 0.0697 (0.0768) loss: 0.8298 (0.8258) time: 0.1470 data: 0.0595 max mem: 9377 +Train: [32] [5900/6250] eta: 0:00:51 lr: 0.000100 grad: 0.0730 (0.0768) loss: 0.8273 (0.8259) time: 0.1515 data: 0.0666 max mem: 9377 +Train: [32] [6000/6250] eta: 0:00:36 lr: 0.000100 grad: 0.0736 (0.0768) loss: 0.8274 (0.8259) time: 0.1420 data: 0.0624 max mem: 9377 +Train: [32] [6100/6250] eta: 0:00:22 lr: 0.000100 grad: 0.0711 (0.0767) loss: 0.8263 (0.8259) time: 0.1201 data: 0.0265 max mem: 9377 +Train: [32] [6200/6250] eta: 0:00:07 lr: 0.000100 grad: 0.0699 (0.0767) loss: 0.8280 (0.8260) time: 0.1481 data: 0.0611 max mem: 9377 +Train: [32] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.0667 (0.0767) loss: 0.8285 (0.8260) time: 0.1342 data: 0.0509 max mem: 9377 +Train: [32] Total time: 0:15:24 (0.1479 s / it) +Averaged stats: lr: 0.000100 grad: 0.0667 (0.0767) loss: 0.8285 (0.8260) +Eval (hcp-train-subset): [32] [ 0/62] eta: 0:05:24 loss: 0.8372 (0.8372) time: 5.2390 data: 5.2086 max mem: 9377 +Eval (hcp-train-subset): [32] [61/62] eta: 0:00:00 loss: 0.8344 (0.8374) time: 0.1375 data: 0.1118 max mem: 9377 +Eval (hcp-train-subset): [32] Total time: 0:00:13 (0.2236 s / it) +Averaged stats (hcp-train-subset): loss: 0.8344 (0.8374) +Eval (hcp-val): [32] [ 0/62] eta: 0:03:25 loss: 0.8374 (0.8374) time: 3.3066 data: 3.2288 max mem: 9377 +Eval (hcp-val): [32] [61/62] eta: 0:00:00 loss: 0.8379 (0.8389) time: 0.1265 data: 0.0987 max mem: 9377 +Eval (hcp-val): [32] Total time: 0:00:13 (0.2123 s / it) +Averaged stats (hcp-val): loss: 0.8379 (0.8389) +Eval (nsd-val): [32] [ 0/62] eta: 0:04:33 loss: 0.8037 (0.8037) time: 4.4123 data: 4.3820 max mem: 9377 +Eval (nsd-val): [32] [61/62] eta: 0:00:00 loss: 0.8123 (0.8146) time: 0.1313 data: 0.1036 max mem: 9377 +Eval (nsd-val): [32] Total time: 0:00:13 (0.2141 s / it) +Averaged stats (nsd-val): loss: 0.8123 (0.8146) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [33] [ 0/6250] eta: 7:54:39 lr: 0.000100 grad: 0.0790 (0.0790) loss: 0.8490 (0.8490) time: 4.5567 data: 4.3280 max mem: 9377 +Train: [33] [ 100/6250] eta: 0:20:10 lr: 0.000100 grad: 0.0720 (0.0744) loss: 0.8428 (0.8420) time: 0.1561 data: 0.0680 max mem: 9377 +Train: [33] [ 200/6250] eta: 0:17:00 lr: 0.000100 grad: 0.0738 (0.0772) loss: 0.8297 (0.8379) time: 0.1490 data: 0.0667 max mem: 9377 +Train: [33] [ 300/6250] eta: 0:15:52 lr: 0.000100 grad: 0.0691 (0.0792) loss: 0.8336 (0.8354) time: 0.1417 data: 0.0563 max mem: 9377 +Train: [33] [ 400/6250] eta: 0:15:12 lr: 0.000100 grad: 0.0750 (0.0788) loss: 0.8296 (0.8341) time: 0.1465 data: 0.0674 max mem: 9377 +Train: [33] [ 500/6250] eta: 0:14:49 lr: 0.000100 grad: 0.0718 (0.0790) loss: 0.8329 (0.8332) time: 0.1503 data: 0.0640 max mem: 9377 +Train: [33] [ 600/6250] eta: 0:14:15 lr: 0.000100 grad: 0.0691 (0.0791) loss: 0.8301 (0.8320) time: 0.1251 data: 0.0359 max mem: 9377 +Train: [33] [ 700/6250] eta: 0:13:52 lr: 0.000100 grad: 0.0707 (0.0784) loss: 0.8318 (0.8313) time: 0.1524 data: 0.0676 max mem: 9377 +Train: [33] [ 800/6250] eta: 0:13:27 lr: 0.000100 grad: 0.0654 (0.0777) loss: 0.8336 (0.8311) time: 0.1528 data: 0.0648 max mem: 9377 +Train: [33] [ 900/6250] eta: 0:13:12 lr: 0.000100 grad: 0.0634 (0.0771) loss: 0.8313 (0.8308) time: 0.1617 data: 0.0748 max mem: 9377 +Train: [33] [1000/6250] eta: 0:12:59 lr: 0.000100 grad: 0.0634 (0.0764) loss: 0.8358 (0.8309) time: 0.1457 data: 0.0643 max mem: 9377 +Train: [33] [1100/6250] eta: 0:12:44 lr: 0.000100 grad: 0.0755 (0.0762) loss: 0.8287 (0.8307) time: 0.1598 data: 0.0800 max mem: 9377 +Train: [33] [1200/6250] eta: 0:12:30 lr: 0.000100 grad: 0.0668 (0.0757) loss: 0.8327 (0.8305) time: 0.1503 data: 0.0653 max mem: 9377 +Train: [33] [1300/6250] eta: 0:12:18 lr: 0.000100 grad: 0.0679 (0.0754) loss: 0.8294 (0.8304) time: 0.1369 data: 0.0504 max mem: 9377 +Train: [33] [1400/6250] eta: 0:12:06 lr: 0.000100 grad: 0.0733 (0.0752) loss: 0.8343 (0.8303) time: 0.1612 data: 0.0781 max mem: 9377 +Train: [33] [1500/6250] eta: 0:11:52 lr: 0.000100 grad: 0.0686 (0.0751) loss: 0.8244 (0.8301) time: 0.1523 data: 0.0740 max mem: 9377 +Train: [33] [1600/6250] eta: 0:11:36 lr: 0.000100 grad: 0.0662 (0.0750) loss: 0.8259 (0.8301) time: 0.1362 data: 0.0506 max mem: 9377 +Train: [33] [1700/6250] eta: 0:11:21 lr: 0.000100 grad: 0.0657 (0.0748) loss: 0.8295 (0.8301) time: 0.1581 data: 0.0768 max mem: 9377 +Train: [33] [1800/6250] eta: 0:11:07 lr: 0.000100 grad: 0.0706 (0.0745) loss: 0.8353 (0.8302) time: 0.1591 data: 0.0810 max mem: 9377 +Train: [33] [1900/6250] eta: 0:10:53 lr: 0.000100 grad: 0.0700 (0.0744) loss: 0.8266 (0.8302) time: 0.1467 data: 0.0633 max mem: 9377 +Train: [33] [2000/6250] eta: 0:10:35 lr: 0.000100 grad: 0.0699 (0.0743) loss: 0.8331 (0.8304) time: 0.1263 data: 0.0365 max mem: 9377 +Train: [33] [2100/6250] eta: 0:10:18 lr: 0.000100 grad: 0.0708 (0.0743) loss: 0.8322 (0.8304) time: 0.1411 data: 0.0606 max mem: 9377 +Train: [33] [2200/6250] eta: 0:10:02 lr: 0.000099 grad: 0.0691 (0.0744) loss: 0.8288 (0.8303) time: 0.1419 data: 0.0584 max mem: 9377 +Train: [33] [2300/6250] eta: 0:09:46 lr: 0.000099 grad: 0.0682 (0.0744) loss: 0.8359 (0.8303) time: 0.1324 data: 0.0466 max mem: 9377 +Train: [33] [2400/6250] eta: 0:09:31 lr: 0.000099 grad: 0.0738 (0.0745) loss: 0.8332 (0.8303) time: 0.1489 data: 0.0632 max mem: 9377 +Train: [33] [2500/6250] eta: 0:09:16 lr: 0.000099 grad: 0.0692 (0.0745) loss: 0.8324 (0.8303) time: 0.1505 data: 0.0714 max mem: 9377 +Train: [33] [2600/6250] eta: 0:09:01 lr: 0.000099 grad: 0.0717 (0.0745) loss: 0.8235 (0.8302) time: 0.1522 data: 0.0709 max mem: 9377 +Train: [33] [2700/6250] eta: 0:08:46 lr: 0.000099 grad: 0.0742 (0.0745) loss: 0.8282 (0.8301) time: 0.1473 data: 0.0665 max mem: 9377 +Train: [33] [2800/6250] eta: 0:08:31 lr: 0.000099 grad: 0.0704 (0.0746) loss: 0.8299 (0.8300) time: 0.1600 data: 0.0780 max mem: 9377 +Train: [33] [2900/6250] eta: 0:08:16 lr: 0.000099 grad: 0.0737 (0.0747) loss: 0.8223 (0.8298) time: 0.1368 data: 0.0548 max mem: 9377 +Train: [33] [3000/6250] eta: 0:08:00 lr: 0.000099 grad: 0.0747 (0.0748) loss: 0.8285 (0.8298) time: 0.1350 data: 0.0549 max mem: 9377 +Train: [33] [3100/6250] eta: 0:07:45 lr: 0.000099 grad: 0.0762 (0.0749) loss: 0.8277 (0.8296) time: 0.1457 data: 0.0593 max mem: 9377 +Train: [33] [3200/6250] eta: 0:07:30 lr: 0.000099 grad: 0.0707 (0.0750) loss: 0.8317 (0.8295) time: 0.1665 data: 0.0854 max mem: 9377 +Train: [33] [3300/6250] eta: 0:07:15 lr: 0.000099 grad: 0.0711 (0.0751) loss: 0.8259 (0.8295) time: 0.1314 data: 0.0486 max mem: 9377 +Train: [33] [3400/6250] eta: 0:06:59 lr: 0.000099 grad: 0.0774 (0.0751) loss: 0.8319 (0.8294) time: 0.1263 data: 0.0450 max mem: 9377 +Train: [33] [3500/6250] eta: 0:06:44 lr: 0.000099 grad: 0.0727 (0.0751) loss: 0.8320 (0.8295) time: 0.1525 data: 0.0689 max mem: 9377 +Train: [33] [3600/6250] eta: 0:06:30 lr: 0.000099 grad: 0.0742 (0.0751) loss: 0.8255 (0.8294) time: 0.1764 data: 0.0901 max mem: 9377 +Train: [33] [3700/6250] eta: 0:06:14 lr: 0.000099 grad: 0.0734 (0.0750) loss: 0.8302 (0.8294) time: 0.1332 data: 0.0533 max mem: 9377 +Train: [33] [3800/6250] eta: 0:05:59 lr: 0.000099 grad: 0.0751 (0.0751) loss: 0.8255 (0.8294) time: 0.1361 data: 0.0551 max mem: 9377 +Train: [33] [3900/6250] eta: 0:05:45 lr: 0.000099 grad: 0.0793 (0.0751) loss: 0.8262 (0.8295) time: 0.1353 data: 0.0543 max mem: 9377 +Train: [33] [4000/6250] eta: 0:05:30 lr: 0.000099 grad: 0.0713 (0.0751) loss: 0.8332 (0.8295) time: 0.1124 data: 0.0331 max mem: 9377 +Train: [33] [4100/6250] eta: 0:05:15 lr: 0.000099 grad: 0.0739 (0.0751) loss: 0.8318 (0.8295) time: 0.1500 data: 0.0691 max mem: 9377 +Train: [33] [4200/6250] eta: 0:05:00 lr: 0.000099 grad: 0.0701 (0.0751) loss: 0.8331 (0.8295) time: 0.1374 data: 0.0516 max mem: 9377 +Train: [33] [4300/6250] eta: 0:04:45 lr: 0.000099 grad: 0.0734 (0.0751) loss: 0.8317 (0.8296) time: 0.1408 data: 0.0563 max mem: 9377 +Train: [33] [4400/6250] eta: 0:04:31 lr: 0.000099 grad: 0.0692 (0.0751) loss: 0.8300 (0.8296) time: 0.1578 data: 0.0772 max mem: 9377 +Train: [33] [4500/6250] eta: 0:04:16 lr: 0.000099 grad: 0.0709 (0.0751) loss: 0.8310 (0.8296) time: 0.1127 data: 0.0322 max mem: 9377 +Train: [33] [4600/6250] eta: 0:04:01 lr: 0.000099 grad: 0.0679 (0.0750) loss: 0.8333 (0.8297) time: 0.1440 data: 0.0629 max mem: 9377 +Train: [33] [4700/6250] eta: 0:03:46 lr: 0.000099 grad: 0.0711 (0.0750) loss: 0.8328 (0.8297) time: 0.1259 data: 0.0404 max mem: 9377 +Train: [33] [4800/6250] eta: 0:03:31 lr: 0.000099 grad: 0.0711 (0.0750) loss: 0.8353 (0.8297) time: 0.1621 data: 0.0784 max mem: 9377 +Train: [33] [4900/6250] eta: 0:03:17 lr: 0.000099 grad: 0.0729 (0.0750) loss: 0.8297 (0.8297) time: 0.1395 data: 0.0611 max mem: 9377 +Train: [33] [5000/6250] eta: 0:03:03 lr: 0.000099 grad: 0.0719 (0.0750) loss: 0.8257 (0.8297) time: 0.1429 data: 0.0575 max mem: 9377 +Train: [33] [5100/6250] eta: 0:02:48 lr: 0.000099 grad: 0.0731 (0.0750) loss: 0.8309 (0.8297) time: 0.1555 data: 0.0719 max mem: 9377 +Train: [33] [5200/6250] eta: 0:02:34 lr: 0.000099 grad: 0.0727 (0.0751) loss: 0.8281 (0.8297) time: 0.1591 data: 0.0828 max mem: 9377 +Train: [33] [5300/6250] eta: 0:02:19 lr: 0.000099 grad: 0.0727 (0.0751) loss: 0.8305 (0.8296) time: 0.1329 data: 0.0558 max mem: 9377 +Train: [33] [5400/6250] eta: 0:02:04 lr: 0.000099 grad: 0.0749 (0.0752) loss: 0.8307 (0.8297) time: 0.1600 data: 0.0871 max mem: 9377 +Train: [33] [5500/6250] eta: 0:01:50 lr: 0.000099 grad: 0.0703 (0.0751) loss: 0.8379 (0.8297) time: 0.1532 data: 0.0746 max mem: 9377 +Train: [33] [5600/6250] eta: 0:01:35 lr: 0.000099 grad: 0.0709 (0.0752) loss: 0.8357 (0.8297) time: 0.1606 data: 0.0777 max mem: 9377 +Train: [33] [5700/6250] eta: 0:01:20 lr: 0.000099 grad: 0.0699 (0.0752) loss: 0.8339 (0.8297) time: 0.1572 data: 0.0743 max mem: 9377 +Train: [33] [5800/6250] eta: 0:01:06 lr: 0.000099 grad: 0.0687 (0.0752) loss: 0.8346 (0.8297) time: 0.1253 data: 0.0366 max mem: 9377 +Train: [33] [5900/6250] eta: 0:00:51 lr: 0.000098 grad: 0.0717 (0.0752) loss: 0.8348 (0.8298) time: 0.1409 data: 0.0496 max mem: 9377 +Train: [33] [6000/6250] eta: 0:00:36 lr: 0.000098 grad: 0.0704 (0.0752) loss: 0.8284 (0.8297) time: 0.1511 data: 0.0627 max mem: 9377 +Train: [33] [6100/6250] eta: 0:00:21 lr: 0.000098 grad: 0.0728 (0.0752) loss: 0.8328 (0.8297) time: 0.1270 data: 0.0413 max mem: 9377 +Train: [33] [6200/6250] eta: 0:00:07 lr: 0.000098 grad: 0.0733 (0.0752) loss: 0.8328 (0.8298) time: 0.1441 data: 0.0523 max mem: 9377 +Train: [33] [6249/6250] eta: 0:00:00 lr: 0.000098 grad: 0.0766 (0.0751) loss: 0.8316 (0.8298) time: 0.1414 data: 0.0562 max mem: 9377 +Train: [33] Total time: 0:15:19 (0.1472 s / it) +Averaged stats: lr: 0.000098 grad: 0.0766 (0.0751) loss: 0.8316 (0.8298) +Eval (hcp-train-subset): [33] [ 0/62] eta: 0:04:59 loss: 0.8374 (0.8374) time: 4.8276 data: 4.7980 max mem: 9377 +Eval (hcp-train-subset): [33] [61/62] eta: 0:00:00 loss: 0.8344 (0.8365) time: 0.1117 data: 0.0850 max mem: 9377 +Eval (hcp-train-subset): [33] Total time: 0:00:12 (0.2078 s / it) +Averaged stats (hcp-train-subset): loss: 0.8344 (0.8365) +Eval (hcp-val): [33] [ 0/62] eta: 0:04:47 loss: 0.8396 (0.8396) time: 4.6327 data: 4.6031 max mem: 9377 +Eval (hcp-val): [33] [61/62] eta: 0:00:00 loss: 0.8403 (0.8388) time: 0.1571 data: 0.1313 max mem: 9377 +Eval (hcp-val): [33] Total time: 0:00:14 (0.2398 s / it) +Averaged stats (hcp-val): loss: 0.8403 (0.8388) +Eval (nsd-val): [33] [ 0/62] eta: 0:04:53 loss: 0.8052 (0.8052) time: 4.7331 data: 4.7026 max mem: 9377 +Eval (nsd-val): [33] [61/62] eta: 0:00:00 loss: 0.8127 (0.8133) time: 0.1375 data: 0.1120 max mem: 9377 +Eval (nsd-val): [33] Total time: 0:00:13 (0.2159 s / it) +Averaged stats (nsd-val): loss: 0.8127 (0.8133) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [34] [ 0/6250] eta: 6:54:59 lr: 0.000098 grad: 0.0591 (0.0591) loss: 0.8359 (0.8359) time: 3.9839 data: 3.7232 max mem: 9377 +Train: [34] [ 100/6250] eta: 0:19:50 lr: 0.000098 grad: 0.0793 (0.0962) loss: 0.8262 (0.8220) time: 0.1358 data: 0.0429 max mem: 9377 +Train: [34] [ 200/6250] eta: 0:17:17 lr: 0.000098 grad: 0.0837 (0.0917) loss: 0.8112 (0.8196) time: 0.1406 data: 0.0551 max mem: 9377 +Train: [34] [ 300/6250] eta: 0:16:15 lr: 0.000098 grad: 0.0756 (0.0876) loss: 0.8205 (0.8211) time: 0.1566 data: 0.0626 max mem: 9377 +Train: [34] [ 400/6250] eta: 0:15:28 lr: 0.000098 grad: 0.0695 (0.0849) loss: 0.8281 (0.8222) time: 0.1508 data: 0.0581 max mem: 9377 +Train: [34] [ 500/6250] eta: 0:14:57 lr: 0.000098 grad: 0.0709 (0.0849) loss: 0.8217 (0.8226) time: 0.1440 data: 0.0597 max mem: 9377 +Train: [34] [ 600/6250] eta: 0:14:33 lr: 0.000098 grad: 0.0725 (0.0835) loss: 0.8229 (0.8228) time: 0.1521 data: 0.0609 max mem: 9377 +Train: [34] [ 700/6250] eta: 0:14:04 lr: 0.000098 grad: 0.0678 (0.0823) loss: 0.8287 (0.8234) time: 0.1366 data: 0.0527 max mem: 9377 +Train: [34] [ 800/6250] eta: 0:13:38 lr: 0.000098 grad: 0.0732 (0.0815) loss: 0.8263 (0.8238) time: 0.1486 data: 0.0597 max mem: 9377 +Train: [34] [ 900/6250] eta: 0:13:12 lr: 0.000098 grad: 0.0738 (0.0805) loss: 0.8259 (0.8243) time: 0.1348 data: 0.0424 max mem: 9377 +Train: [34] [1000/6250] eta: 0:13:02 lr: 0.000098 grad: 0.0703 (0.0795) loss: 0.8257 (0.8250) time: 0.1784 data: 0.0997 max mem: 9377 +Train: [34] [1100/6250] eta: 0:12:47 lr: 0.000098 grad: 0.0714 (0.0787) loss: 0.8333 (0.8256) time: 0.1405 data: 0.0592 max mem: 9377 +Train: [34] [1200/6250] eta: 0:12:34 lr: 0.000098 grad: 0.0720 (0.0784) loss: 0.8249 (0.8257) time: 0.1512 data: 0.0613 max mem: 9377 +Train: [34] [1300/6250] eta: 0:12:23 lr: 0.000098 grad: 0.0703 (0.0779) loss: 0.8291 (0.8259) time: 0.1594 data: 0.0665 max mem: 9377 +Train: [34] [1400/6250] eta: 0:12:12 lr: 0.000098 grad: 0.0715 (0.0776) loss: 0.8222 (0.8258) time: 0.1632 data: 0.0835 max mem: 9377 +Train: [34] [1500/6250] eta: 0:11:56 lr: 0.000098 grad: 0.0700 (0.0774) loss: 0.8305 (0.8258) time: 0.1572 data: 0.0782 max mem: 9377 +Train: [34] [1600/6250] eta: 0:11:38 lr: 0.000098 grad: 0.0712 (0.0772) loss: 0.8261 (0.8259) time: 0.1392 data: 0.0392 max mem: 9377 +Train: [34] [1700/6250] eta: 0:11:23 lr: 0.000098 grad: 0.0733 (0.0770) loss: 0.8338 (0.8261) time: 0.1450 data: 0.0595 max mem: 9377 +Train: [34] [1800/6250] eta: 0:11:08 lr: 0.000098 grad: 0.0775 (0.0769) loss: 0.8211 (0.8262) time: 0.1555 data: 0.0742 max mem: 9377 +Train: [34] [1900/6250] eta: 0:10:50 lr: 0.000098 grad: 0.0747 (0.0770) loss: 0.8272 (0.8262) time: 0.1496 data: 0.0638 max mem: 9377 +Train: [34] [2000/6250] eta: 0:10:34 lr: 0.000098 grad: 0.0808 (0.0770) loss: 0.8254 (0.8263) time: 0.1429 data: 0.0616 max mem: 9377 +Train: [34] [2100/6250] eta: 0:10:19 lr: 0.000098 grad: 0.0743 (0.0772) loss: 0.8218 (0.8262) time: 0.1411 data: 0.0620 max mem: 9377 +Train: [34] [2200/6250] eta: 0:10:03 lr: 0.000098 grad: 0.0722 (0.0771) loss: 0.8271 (0.8262) time: 0.1483 data: 0.0653 max mem: 9377 +Train: [34] [2300/6250] eta: 0:09:47 lr: 0.000098 grad: 0.0762 (0.0772) loss: 0.8185 (0.8262) time: 0.1409 data: 0.0595 max mem: 9377 +Train: [34] [2400/6250] eta: 0:09:32 lr: 0.000098 grad: 0.0788 (0.0773) loss: 0.8253 (0.8262) time: 0.1515 data: 0.0681 max mem: 9377 +Train: [34] [2500/6250] eta: 0:09:17 lr: 0.000098 grad: 0.0710 (0.0773) loss: 0.8304 (0.8262) time: 0.1393 data: 0.0498 max mem: 9377 +Train: [34] [2600/6250] eta: 0:09:01 lr: 0.000098 grad: 0.0785 (0.0773) loss: 0.8244 (0.8263) time: 0.1465 data: 0.0678 max mem: 9377 +Train: [34] [2700/6250] eta: 0:08:46 lr: 0.000098 grad: 0.0759 (0.0773) loss: 0.8233 (0.8263) time: 0.1270 data: 0.0435 max mem: 9377 +Train: [34] [2800/6250] eta: 0:08:31 lr: 0.000098 grad: 0.0778 (0.0773) loss: 0.8246 (0.8264) time: 0.1457 data: 0.0606 max mem: 9377 +Train: [34] [2900/6250] eta: 0:08:15 lr: 0.000098 grad: 0.0752 (0.0773) loss: 0.8300 (0.8264) time: 0.1192 data: 0.0369 max mem: 9377 +Train: [34] [3000/6250] eta: 0:07:59 lr: 0.000098 grad: 0.0721 (0.0773) loss: 0.8301 (0.8264) time: 0.1508 data: 0.0676 max mem: 9377 +Train: [34] [3100/6250] eta: 0:07:45 lr: 0.000098 grad: 0.0698 (0.0772) loss: 0.8275 (0.8264) time: 0.1513 data: 0.0688 max mem: 9377 +Train: [34] [3200/6250] eta: 0:07:30 lr: 0.000098 grad: 0.0765 (0.0772) loss: 0.8269 (0.8265) time: 0.1343 data: 0.0508 max mem: 9377 +Train: [34] [3300/6250] eta: 0:07:15 lr: 0.000097 grad: 0.0691 (0.0773) loss: 0.8312 (0.8266) time: 0.1357 data: 0.0514 max mem: 9377 +Train: [34] [3400/6250] eta: 0:07:00 lr: 0.000097 grad: 0.0735 (0.0773) loss: 0.8223 (0.8266) time: 0.1498 data: 0.0690 max mem: 9377 +Train: [34] [3500/6250] eta: 0:06:45 lr: 0.000097 grad: 0.0769 (0.0773) loss: 0.8227 (0.8266) time: 0.1446 data: 0.0640 max mem: 9377 +Train: [34] [3600/6250] eta: 0:06:30 lr: 0.000097 grad: 0.0803 (0.0774) loss: 0.8283 (0.8265) time: 0.1551 data: 0.0766 max mem: 9377 +Train: [34] [3700/6250] eta: 0:06:16 lr: 0.000097 grad: 0.0753 (0.0774) loss: 0.8268 (0.8266) time: 0.1579 data: 0.0761 max mem: 9377 +Train: [34] [3800/6250] eta: 0:06:01 lr: 0.000097 grad: 0.0810 (0.0775) loss: 0.8187 (0.8265) time: 0.1442 data: 0.0581 max mem: 9377 +Train: [34] [3900/6250] eta: 0:05:46 lr: 0.000097 grad: 0.0774 (0.0775) loss: 0.8268 (0.8265) time: 0.1678 data: 0.0899 max mem: 9377 +Train: [34] [4000/6250] eta: 0:05:31 lr: 0.000097 grad: 0.0709 (0.0776) loss: 0.8266 (0.8265) time: 0.1479 data: 0.0622 max mem: 9377 +Train: [34] [4100/6250] eta: 0:05:17 lr: 0.000097 grad: 0.0737 (0.0775) loss: 0.8261 (0.8265) time: 0.1647 data: 0.0840 max mem: 9377 +Train: [34] [4200/6250] eta: 0:05:02 lr: 0.000097 grad: 0.0736 (0.0775) loss: 0.8265 (0.8265) time: 0.1486 data: 0.0718 max mem: 9377 +Train: [34] [4300/6250] eta: 0:04:47 lr: 0.000097 grad: 0.0756 (0.0775) loss: 0.8237 (0.8265) time: 0.1443 data: 0.0621 max mem: 9377 +Train: [34] [4400/6250] eta: 0:04:32 lr: 0.000097 grad: 0.0792 (0.0775) loss: 0.8161 (0.8264) time: 0.1460 data: 0.0649 max mem: 9377 +Train: [34] [4500/6250] eta: 0:04:17 lr: 0.000097 grad: 0.0749 (0.0775) loss: 0.8304 (0.8264) time: 0.1528 data: 0.0707 max mem: 9377 +Train: [34] [4600/6250] eta: 0:04:03 lr: 0.000097 grad: 0.0720 (0.0775) loss: 0.8265 (0.8264) time: 0.1405 data: 0.0663 max mem: 9377 +Train: [34] [4700/6250] eta: 0:03:49 lr: 0.000097 grad: 0.0713 (0.0774) loss: 0.8305 (0.8264) time: 0.1915 data: 0.1025 max mem: 9377 +Train: [34] [4800/6250] eta: 0:03:34 lr: 0.000097 grad: 0.0783 (0.0775) loss: 0.8170 (0.8264) time: 0.1619 data: 0.0805 max mem: 9377 +Train: [34] [4900/6250] eta: 0:03:20 lr: 0.000097 grad: 0.0766 (0.0775) loss: 0.8199 (0.8263) time: 0.1741 data: 0.0928 max mem: 9377 +Train: [34] [5000/6250] eta: 0:03:05 lr: 0.000097 grad: 0.0755 (0.0775) loss: 0.8249 (0.8263) time: 0.1487 data: 0.0663 max mem: 9377 +Train: [34] [5100/6250] eta: 0:02:50 lr: 0.000097 grad: 0.0775 (0.0775) loss: 0.8213 (0.8262) time: 0.1290 data: 0.0439 max mem: 9377 +Train: [34] [5200/6250] eta: 0:02:35 lr: 0.000097 grad: 0.0760 (0.0775) loss: 0.8198 (0.8262) time: 0.1506 data: 0.0673 max mem: 9377 +Train: [34] [5300/6250] eta: 0:02:21 lr: 0.000097 grad: 0.0777 (0.0775) loss: 0.8216 (0.8262) time: 0.1459 data: 0.0653 max mem: 9377 +Train: [34] [5400/6250] eta: 0:02:06 lr: 0.000097 grad: 0.0732 (0.0775) loss: 0.8249 (0.8261) time: 0.1312 data: 0.0521 max mem: 9377 +Train: [34] [5500/6250] eta: 0:01:51 lr: 0.000097 grad: 0.0792 (0.0775) loss: 0.8250 (0.8261) time: 0.1438 data: 0.0528 max mem: 9377 +Train: [34] [5600/6250] eta: 0:01:36 lr: 0.000097 grad: 0.0749 (0.0776) loss: 0.8322 (0.8261) time: 0.1406 data: 0.0581 max mem: 9377 +Train: [34] [5700/6250] eta: 0:01:21 lr: 0.000097 grad: 0.0788 (0.0775) loss: 0.8220 (0.8261) time: 0.1286 data: 0.0414 max mem: 9377 +Train: [34] [5800/6250] eta: 0:01:06 lr: 0.000097 grad: 0.0732 (0.0776) loss: 0.8245 (0.8260) time: 0.1437 data: 0.0600 max mem: 9377 +Train: [34] [5900/6250] eta: 0:00:51 lr: 0.000097 grad: 0.0769 (0.0776) loss: 0.8220 (0.8260) time: 0.1542 data: 0.0752 max mem: 9377 +Train: [34] [6000/6250] eta: 0:00:36 lr: 0.000097 grad: 0.0730 (0.0776) loss: 0.8183 (0.8260) time: 0.1456 data: 0.0656 max mem: 9377 +Train: [34] [6100/6250] eta: 0:00:22 lr: 0.000097 grad: 0.0811 (0.0777) loss: 0.8265 (0.8259) time: 0.1323 data: 0.0512 max mem: 9377 +Train: [34] [6200/6250] eta: 0:00:07 lr: 0.000097 grad: 0.0756 (0.0777) loss: 0.8280 (0.8258) time: 0.1406 data: 0.0621 max mem: 9377 +Train: [34] [6249/6250] eta: 0:00:00 lr: 0.000097 grad: 0.0777 (0.0777) loss: 0.8288 (0.8258) time: 0.1459 data: 0.0495 max mem: 9377 +Train: [34] Total time: 0:15:29 (0.1487 s / it) +Averaged stats: lr: 0.000097 grad: 0.0777 (0.0777) loss: 0.8288 (0.8258) +Eval (hcp-train-subset): [34] [ 0/62] eta: 0:04:43 loss: 0.8355 (0.8355) time: 4.5733 data: 4.5410 max mem: 9377 +Eval (hcp-train-subset): [34] [61/62] eta: 0:00:00 loss: 0.8319 (0.8364) time: 0.1205 data: 0.0895 max mem: 9377 +Eval (hcp-train-subset): [34] Total time: 0:00:14 (0.2322 s / it) +Averaged stats (hcp-train-subset): loss: 0.8319 (0.8364) +Making plots (hcp-train-subset): example=22 +Eval (hcp-val): [34] [ 0/62] eta: 0:05:42 loss: 0.8375 (0.8375) time: 5.5194 data: 5.4886 max mem: 9377 +Eval (hcp-val): [34] [61/62] eta: 0:00:00 loss: 0.8378 (0.8391) time: 0.1221 data: 0.0967 max mem: 9377 +Eval (hcp-val): [34] Total time: 0:00:13 (0.2220 s / it) +Averaged stats (hcp-val): loss: 0.8378 (0.8391) +Making plots (hcp-val): example=22 +Eval (nsd-val): [34] [ 0/62] eta: 0:04:02 loss: 0.8060 (0.8060) time: 3.9178 data: 3.8361 max mem: 9377 +Eval (nsd-val): [34] [61/62] eta: 0:00:00 loss: 0.8141 (0.8154) time: 0.1132 data: 0.0877 max mem: 9377 +Eval (nsd-val): [34] Total time: 0:00:13 (0.2241 s / it) +Averaged stats (nsd-val): loss: 0.8141 (0.8154) +Making plots (nsd-val): example=5 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00034.pth +Train: [35] [ 0/6250] eta: 9:54:55 lr: 0.000097 grad: 0.1302 (0.1302) loss: 0.8492 (0.8492) time: 5.7112 data: 5.5975 max mem: 9377 +Train: [35] [ 100/6250] eta: 0:21:00 lr: 0.000097 grad: 0.0643 (0.0784) loss: 0.8455 (0.8400) time: 0.1609 data: 0.0667 max mem: 9377 +Train: [35] [ 200/6250] eta: 0:17:52 lr: 0.000097 grad: 0.0737 (0.0806) loss: 0.8372 (0.8365) time: 0.1573 data: 0.0701 max mem: 9377 +Train: [35] [ 300/6250] eta: 0:16:48 lr: 0.000097 grad: 0.0674 (0.0805) loss: 0.8318 (0.8341) time: 0.1670 data: 0.0816 max mem: 9377 +Train: [35] [ 400/6250] eta: 0:15:57 lr: 0.000097 grad: 0.0743 (0.0802) loss: 0.8280 (0.8327) time: 0.1510 data: 0.0560 max mem: 9377 +Train: [35] [ 500/6250] eta: 0:15:17 lr: 0.000097 grad: 0.0700 (0.0797) loss: 0.8247 (0.8317) time: 0.1458 data: 0.0571 max mem: 9377 +Train: [35] [ 600/6250] eta: 0:14:41 lr: 0.000097 grad: 0.0703 (0.0796) loss: 0.8227 (0.8306) time: 0.1449 data: 0.0576 max mem: 9377 +Train: [35] [ 700/6250] eta: 0:14:08 lr: 0.000096 grad: 0.0731 (0.0794) loss: 0.8310 (0.8300) time: 0.1137 data: 0.0233 max mem: 9377 +Train: [35] [ 800/6250] eta: 0:13:48 lr: 0.000096 grad: 0.0732 (0.0796) loss: 0.8276 (0.8294) time: 0.1805 data: 0.0995 max mem: 9377 +Train: [35] [ 900/6250] eta: 0:13:32 lr: 0.000096 grad: 0.0792 (0.0800) loss: 0.8238 (0.8289) time: 0.1576 data: 0.0752 max mem: 9377 +Train: [35] [1000/6250] eta: 0:13:20 lr: 0.000096 grad: 0.0744 (0.0798) loss: 0.8200 (0.8286) time: 0.1828 data: 0.1022 max mem: 9377 +Train: [35] [1100/6250] eta: 0:13:10 lr: 0.000096 grad: 0.0756 (0.0799) loss: 0.8229 (0.8282) time: 0.1603 data: 0.0787 max mem: 9377 +Train: [35] [1200/6250] eta: 0:13:01 lr: 0.000096 grad: 0.0764 (0.0805) loss: 0.8224 (0.8278) time: 0.1509 data: 0.0710 max mem: 9377 +Train: [35] [1300/6250] eta: 0:12:45 lr: 0.000096 grad: 0.0788 (0.0804) loss: 0.8135 (0.8273) time: 0.1651 data: 0.0884 max mem: 9377 +Train: [35] [1400/6250] eta: 0:12:31 lr: 0.000096 grad: 0.0772 (0.0804) loss: 0.8246 (0.8268) time: 0.1668 data: 0.0891 max mem: 9377 +Train: [35] [1500/6250] eta: 0:12:16 lr: 0.000096 grad: 0.0806 (0.0807) loss: 0.8159 (0.8264) time: 0.1504 data: 0.0670 max mem: 9377 +Train: [35] [1600/6250] eta: 0:12:02 lr: 0.000096 grad: 0.0831 (0.0809) loss: 0.8202 (0.8260) time: 0.1310 data: 0.0469 max mem: 9377 +Train: [35] [1700/6250] eta: 0:11:45 lr: 0.000096 grad: 0.0795 (0.0813) loss: 0.8194 (0.8256) time: 0.1517 data: 0.0713 max mem: 9377 +Train: [35] [1800/6250] eta: 0:11:29 lr: 0.000096 grad: 0.0727 (0.0813) loss: 0.8268 (0.8253) time: 0.1506 data: 0.0696 max mem: 9377 +Train: [35] [1900/6250] eta: 0:11:11 lr: 0.000096 grad: 0.0798 (0.0814) loss: 0.8243 (0.8252) time: 0.1287 data: 0.0432 max mem: 9377 +Train: [35] [2000/6250] eta: 0:10:54 lr: 0.000096 grad: 0.0766 (0.0814) loss: 0.8218 (0.8249) time: 0.1567 data: 0.0780 max mem: 9377 +Train: [35] [2100/6250] eta: 0:10:35 lr: 0.000096 grad: 0.0822 (0.0812) loss: 0.8177 (0.8248) time: 0.1356 data: 0.0567 max mem: 9377 +Train: [35] [2200/6250] eta: 0:10:17 lr: 0.000096 grad: 0.0779 (0.0812) loss: 0.8236 (0.8248) time: 0.1331 data: 0.0502 max mem: 9377 +Train: [35] [2300/6250] eta: 0:10:00 lr: 0.000096 grad: 0.0739 (0.0812) loss: 0.8262 (0.8246) time: 0.1414 data: 0.0583 max mem: 9377 +Train: [35] [2400/6250] eta: 0:09:43 lr: 0.000096 grad: 0.0815 (0.0813) loss: 0.8203 (0.8244) time: 0.1543 data: 0.0754 max mem: 9377 +Train: [35] [2500/6250] eta: 0:09:26 lr: 0.000096 grad: 0.0816 (0.0814) loss: 0.8206 (0.8243) time: 0.1435 data: 0.0640 max mem: 9377 +Train: [35] [2600/6250] eta: 0:09:10 lr: 0.000096 grad: 0.0784 (0.0814) loss: 0.8276 (0.8243) time: 0.1397 data: 0.0571 max mem: 9377 +Train: [35] [2700/6250] eta: 0:08:55 lr: 0.000096 grad: 0.0821 (0.0814) loss: 0.8241 (0.8241) time: 0.2132 data: 0.0620 max mem: 9377 +Train: [35] [2800/6250] eta: 0:08:40 lr: 0.000096 grad: 0.0733 (0.0814) loss: 0.8273 (0.8242) time: 0.1652 data: 0.0817 max mem: 9377 +Train: [35] [2900/6250] eta: 0:08:24 lr: 0.000096 grad: 0.0764 (0.0814) loss: 0.8268 (0.8241) time: 0.1418 data: 0.0615 max mem: 9377 +Train: [35] [3000/6250] eta: 0:08:08 lr: 0.000096 grad: 0.0812 (0.0814) loss: 0.8198 (0.8241) time: 0.1245 data: 0.0352 max mem: 9377 +Train: [35] [3100/6250] eta: 0:07:53 lr: 0.000096 grad: 0.0750 (0.0814) loss: 0.8221 (0.8240) time: 0.1260 data: 0.0409 max mem: 9377 +Train: [35] [3200/6250] eta: 0:07:37 lr: 0.000096 grad: 0.0748 (0.0814) loss: 0.8217 (0.8239) time: 0.1519 data: 0.0662 max mem: 9377 +Train: [35] [3300/6250] eta: 0:07:23 lr: 0.000096 grad: 0.0852 (0.0815) loss: 0.8200 (0.8238) time: 0.1573 data: 0.0756 max mem: 9377 +Train: [35] [3400/6250] eta: 0:07:06 lr: 0.000096 grad: 0.0779 (0.0815) loss: 0.8220 (0.8238) time: 0.1238 data: 0.0372 max mem: 9377 +Train: [35] [3500/6250] eta: 0:06:51 lr: 0.000096 grad: 0.0769 (0.0815) loss: 0.8278 (0.8237) time: 0.1579 data: 0.0707 max mem: 9377 +Train: [35] [3600/6250] eta: 0:06:35 lr: 0.000096 grad: 0.0767 (0.0815) loss: 0.8229 (0.8237) time: 0.1471 data: 0.0606 max mem: 9377 +Train: [35] [3700/6250] eta: 0:06:20 lr: 0.000096 grad: 0.0809 (0.0815) loss: 0.8264 (0.8237) time: 0.1468 data: 0.0650 max mem: 9377 +Train: [35] [3800/6250] eta: 0:06:06 lr: 0.000096 grad: 0.0739 (0.0815) loss: 0.8247 (0.8237) time: 0.1753 data: 0.0927 max mem: 9377 +Train: [35] [3900/6250] eta: 0:05:50 lr: 0.000096 grad: 0.0794 (0.0815) loss: 0.8182 (0.8236) time: 0.1397 data: 0.0490 max mem: 9377 +Train: [35] [4000/6250] eta: 0:05:35 lr: 0.000096 grad: 0.0766 (0.0815) loss: 0.8207 (0.8236) time: 0.1500 data: 0.0637 max mem: 9377 +Train: [35] [4100/6250] eta: 0:05:20 lr: 0.000096 grad: 0.0764 (0.0814) loss: 0.8210 (0.8236) time: 0.1510 data: 0.0683 max mem: 9377 +Train: [35] [4200/6250] eta: 0:05:05 lr: 0.000096 grad: 0.0812 (0.0814) loss: 0.8263 (0.8235) time: 0.1450 data: 0.0634 max mem: 9377 +Train: [35] [4300/6250] eta: 0:04:51 lr: 0.000095 grad: 0.0818 (0.0815) loss: 0.8285 (0.8235) time: 0.1736 data: 0.0912 max mem: 9377 +Train: [35] [4400/6250] eta: 0:04:37 lr: 0.000095 grad: 0.0779 (0.0815) loss: 0.8204 (0.8234) time: 0.1631 data: 0.0737 max mem: 9377 +Train: [35] [4500/6250] eta: 0:04:23 lr: 0.000095 grad: 0.0816 (0.0816) loss: 0.8203 (0.8232) time: 0.1820 data: 0.0952 max mem: 9377 +Train: [35] [4600/6250] eta: 0:04:09 lr: 0.000095 grad: 0.0743 (0.0816) loss: 0.8235 (0.8231) time: 0.2249 data: 0.1407 max mem: 9377 +Train: [35] [4700/6250] eta: 0:03:54 lr: 0.000095 grad: 0.0814 (0.0816) loss: 0.8230 (0.8231) time: 0.1797 data: 0.0967 max mem: 9377 +Train: [35] [4800/6250] eta: 0:03:40 lr: 0.000095 grad: 0.0747 (0.0816) loss: 0.8268 (0.8230) time: 0.1800 data: 0.0972 max mem: 9377 +Train: [35] [4900/6250] eta: 0:03:25 lr: 0.000095 grad: 0.0732 (0.0816) loss: 0.8240 (0.8230) time: 0.1471 data: 0.0630 max mem: 9377 +Train: [35] [5000/6250] eta: 0:03:09 lr: 0.000095 grad: 0.0781 (0.0816) loss: 0.8219 (0.8230) time: 0.1314 data: 0.0470 max mem: 9377 +Train: [35] [5100/6250] eta: 0:02:54 lr: 0.000095 grad: 0.0736 (0.0817) loss: 0.8258 (0.8230) time: 0.1600 data: 0.0775 max mem: 9377 +Train: [35] [5200/6250] eta: 0:02:39 lr: 0.000095 grad: 0.0795 (0.0817) loss: 0.8256 (0.8230) time: 0.1581 data: 0.0785 max mem: 9377 +Train: [35] [5300/6250] eta: 0:02:24 lr: 0.000095 grad: 0.0770 (0.0816) loss: 0.8239 (0.8230) time: 0.1441 data: 0.0592 max mem: 9377 +Train: [35] [5400/6250] eta: 0:02:08 lr: 0.000095 grad: 0.0742 (0.0817) loss: 0.8316 (0.8230) time: 0.1400 data: 0.0594 max mem: 9377 +Train: [35] [5500/6250] eta: 0:01:53 lr: 0.000095 grad: 0.0754 (0.0816) loss: 0.8252 (0.8230) time: 0.1404 data: 0.0615 max mem: 9377 +Train: [35] [5600/6250] eta: 0:01:38 lr: 0.000095 grad: 0.0787 (0.0816) loss: 0.8203 (0.8230) time: 0.1448 data: 0.0618 max mem: 9377 +Train: [35] [5700/6250] eta: 0:01:23 lr: 0.000095 grad: 0.0762 (0.0816) loss: 0.8266 (0.8231) time: 0.1371 data: 0.0548 max mem: 9377 +Train: [35] [5800/6250] eta: 0:01:08 lr: 0.000095 grad: 0.0812 (0.0816) loss: 0.8204 (0.8231) time: 0.1861 data: 0.1023 max mem: 9377 +Train: [35] [5900/6250] eta: 0:00:53 lr: 0.000095 grad: 0.0776 (0.0816) loss: 0.8261 (0.8232) time: 0.1277 data: 0.0487 max mem: 9377 +Train: [35] [6000/6250] eta: 0:00:37 lr: 0.000095 grad: 0.0754 (0.0815) loss: 0.8301 (0.8233) time: 0.1513 data: 0.0696 max mem: 9377 +Train: [35] [6100/6250] eta: 0:00:22 lr: 0.000095 grad: 0.0784 (0.0815) loss: 0.8317 (0.8233) time: 0.1514 data: 0.0678 max mem: 9377 +Train: [35] [6200/6250] eta: 0:00:07 lr: 0.000095 grad: 0.0787 (0.0815) loss: 0.8243 (0.8234) time: 0.1252 data: 0.0379 max mem: 9377 +Train: [35] [6249/6250] eta: 0:00:00 lr: 0.000095 grad: 0.0785 (0.0815) loss: 0.8270 (0.8234) time: 0.1102 data: 0.0315 max mem: 9377 +Train: [35] Total time: 0:15:54 (0.1528 s / it) +Averaged stats: lr: 0.000095 grad: 0.0785 (0.0815) loss: 0.8270 (0.8234) +Eval (hcp-train-subset): [35] [ 0/62] eta: 0:05:01 loss: 0.8353 (0.8353) time: 4.8631 data: 4.8183 max mem: 9377 +Eval (hcp-train-subset): [35] [61/62] eta: 0:00:00 loss: 0.8352 (0.8359) time: 0.1187 data: 0.0938 max mem: 9377 +Eval (hcp-train-subset): [35] Total time: 0:00:13 (0.2120 s / it) +Averaged stats (hcp-train-subset): loss: 0.8352 (0.8359) +Eval (hcp-val): [35] [ 0/62] eta: 0:04:58 loss: 0.8354 (0.8354) time: 4.8173 data: 4.7874 max mem: 9377 +Eval (hcp-val): [35] [61/62] eta: 0:00:00 loss: 0.8356 (0.8380) time: 0.1080 data: 0.0830 max mem: 9377 +Eval (hcp-val): [35] Total time: 0:00:12 (0.2078 s / it) +Averaged stats (hcp-val): loss: 0.8356 (0.8380) +Eval (nsd-val): [35] [ 0/62] eta: 0:04:02 loss: 0.8026 (0.8026) time: 3.9034 data: 3.8326 max mem: 9377 +Eval (nsd-val): [35] [61/62] eta: 0:00:00 loss: 0.8095 (0.8121) time: 0.1256 data: 0.0986 max mem: 9377 +Eval (nsd-val): [35] Total time: 0:00:12 (0.2069 s / it) +Averaged stats (nsd-val): loss: 0.8095 (0.8121) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [36] [ 0/6250] eta: 7:11:47 lr: 0.000095 grad: 0.0613 (0.0613) loss: 0.8728 (0.8728) time: 4.1452 data: 3.8527 max mem: 9377 +Train: [36] [ 100/6250] eta: 0:20:40 lr: 0.000095 grad: 0.0826 (0.0994) loss: 0.8275 (0.8369) time: 0.1541 data: 0.0576 max mem: 9377 +Train: [36] [ 200/6250] eta: 0:17:50 lr: 0.000095 grad: 0.0755 (0.0943) loss: 0.8276 (0.8308) time: 0.1423 data: 0.0370 max mem: 9377 +Train: [36] [ 300/6250] eta: 0:16:24 lr: 0.000095 grad: 0.0798 (0.0930) loss: 0.8193 (0.8269) time: 0.1290 data: 0.0237 max mem: 9377 +Train: [36] [ 400/6250] eta: 0:15:40 lr: 0.000095 grad: 0.0692 (0.0909) loss: 0.8379 (0.8258) time: 0.1398 data: 0.0470 max mem: 9377 +Train: [36] [ 500/6250] eta: 0:14:58 lr: 0.000095 grad: 0.0758 (0.0887) loss: 0.8253 (0.8256) time: 0.1305 data: 0.0405 max mem: 9377 +Train: [36] [ 600/6250] eta: 0:14:32 lr: 0.000095 grad: 0.0721 (0.0866) loss: 0.8291 (0.8259) time: 0.1439 data: 0.0576 max mem: 9377 +Train: [36] [ 700/6250] eta: 0:14:16 lr: 0.000095 grad: 0.0735 (0.0855) loss: 0.8212 (0.8262) time: 0.1497 data: 0.0673 max mem: 9377 +Train: [36] [ 800/6250] eta: 0:13:55 lr: 0.000095 grad: 0.0719 (0.0841) loss: 0.8333 (0.8267) time: 0.1469 data: 0.0639 max mem: 9377 +Train: [36] [ 900/6250] eta: 0:13:36 lr: 0.000095 grad: 0.0704 (0.0830) loss: 0.8279 (0.8267) time: 0.1641 data: 0.0813 max mem: 9377 +Train: [36] [1000/6250] eta: 0:13:26 lr: 0.000095 grad: 0.0711 (0.0822) loss: 0.8332 (0.8270) time: 0.1629 data: 0.0846 max mem: 9377 +Train: [36] [1100/6250] eta: 0:13:15 lr: 0.000095 grad: 0.0749 (0.0819) loss: 0.8221 (0.8268) time: 0.1797 data: 0.1014 max mem: 9377 +Train: [36] [1200/6250] eta: 0:13:01 lr: 0.000095 grad: 0.0671 (0.0812) loss: 0.8336 (0.8268) time: 0.1876 data: 0.1109 max mem: 9377 +Train: [36] [1300/6250] eta: 0:12:47 lr: 0.000095 grad: 0.0751 (0.0808) loss: 0.8247 (0.8269) time: 0.1678 data: 0.0930 max mem: 9377 +Train: [36] [1400/6250] eta: 0:12:32 lr: 0.000095 grad: 0.0755 (0.0805) loss: 0.8288 (0.8269) time: 0.1591 data: 0.0823 max mem: 9377 +Train: [36] [1500/6250] eta: 0:12:19 lr: 0.000095 grad: 0.0751 (0.0804) loss: 0.8225 (0.8268) time: 0.1645 data: 0.0825 max mem: 9377 +Train: [36] [1600/6250] eta: 0:12:05 lr: 0.000094 grad: 0.0746 (0.0801) loss: 0.8274 (0.8268) time: 0.1592 data: 0.0763 max mem: 9377 +Train: [36] [1700/6250] eta: 0:11:51 lr: 0.000094 grad: 0.0775 (0.0800) loss: 0.8244 (0.8267) time: 0.1580 data: 0.0769 max mem: 9377 +Train: [36] [1800/6250] eta: 0:11:37 lr: 0.000094 grad: 0.0766 (0.0799) loss: 0.8275 (0.8268) time: 0.1568 data: 0.0794 max mem: 9377 +Train: [36] [1900/6250] eta: 0:11:22 lr: 0.000094 grad: 0.0766 (0.0798) loss: 0.8225 (0.8268) time: 0.1413 data: 0.0536 max mem: 9377 +Train: [36] [2000/6250] eta: 0:11:07 lr: 0.000094 grad: 0.0752 (0.0798) loss: 0.8281 (0.8268) time: 0.1486 data: 0.0612 max mem: 9377 +Train: [36] [2100/6250] eta: 0:10:51 lr: 0.000094 grad: 0.0781 (0.0798) loss: 0.8266 (0.8267) time: 0.1653 data: 0.0863 max mem: 9377 +Train: [36] [2200/6250] eta: 0:10:33 lr: 0.000094 grad: 0.0716 (0.0798) loss: 0.8293 (0.8267) time: 0.1485 data: 0.0665 max mem: 9377 +Train: [36] [2300/6250] eta: 0:10:17 lr: 0.000094 grad: 0.0773 (0.0797) loss: 0.8310 (0.8267) time: 0.1574 data: 0.0746 max mem: 9377 +Train: [36] [2400/6250] eta: 0:10:00 lr: 0.000094 grad: 0.0722 (0.0796) loss: 0.8300 (0.8266) time: 0.1448 data: 0.0616 max mem: 9377 +Train: [36] [2500/6250] eta: 0:09:41 lr: 0.000094 grad: 0.0752 (0.0796) loss: 0.8235 (0.8266) time: 0.1346 data: 0.0486 max mem: 9377 +Train: [36] [2600/6250] eta: 0:09:24 lr: 0.000094 grad: 0.0800 (0.0795) loss: 0.8252 (0.8265) time: 0.1312 data: 0.0489 max mem: 9377 +Train: [36] [2700/6250] eta: 0:09:07 lr: 0.000094 grad: 0.0757 (0.0795) loss: 0.8322 (0.8266) time: 0.1499 data: 0.0672 max mem: 9377 +Train: [36] [2800/6250] eta: 0:08:50 lr: 0.000094 grad: 0.0771 (0.0795) loss: 0.8261 (0.8266) time: 0.1318 data: 0.0525 max mem: 9377 +Train: [36] [2900/6250] eta: 0:08:33 lr: 0.000094 grad: 0.0816 (0.0795) loss: 0.8248 (0.8266) time: 0.1365 data: 0.0585 max mem: 9377 +Train: [36] [3000/6250] eta: 0:08:17 lr: 0.000094 grad: 0.0774 (0.0794) loss: 0.8197 (0.8266) time: 0.1439 data: 0.0615 max mem: 9377 +Train: [36] [3100/6250] eta: 0:07:59 lr: 0.000094 grad: 0.0792 (0.0794) loss: 0.8287 (0.8266) time: 0.1352 data: 0.0501 max mem: 9377 +Train: [36] [3200/6250] eta: 0:07:43 lr: 0.000094 grad: 0.0714 (0.0793) loss: 0.8239 (0.8266) time: 0.1235 data: 0.0401 max mem: 9377 +Train: [36] [3300/6250] eta: 0:07:27 lr: 0.000094 grad: 0.0797 (0.0794) loss: 0.8221 (0.8265) time: 0.1317 data: 0.0564 max mem: 9377 +Train: [36] [3400/6250] eta: 0:07:11 lr: 0.000094 grad: 0.0728 (0.0796) loss: 0.8309 (0.8265) time: 0.1404 data: 0.0557 max mem: 9377 +Train: [36] [3500/6250] eta: 0:06:55 lr: 0.000094 grad: 0.0768 (0.0796) loss: 0.8251 (0.8265) time: 0.1252 data: 0.0411 max mem: 9377 +Train: [36] [3600/6250] eta: 0:06:40 lr: 0.000094 grad: 0.0762 (0.0796) loss: 0.8237 (0.8265) time: 0.1364 data: 0.0501 max mem: 9377 +Train: [36] [3700/6250] eta: 0:06:24 lr: 0.000094 grad: 0.0777 (0.0796) loss: 0.8271 (0.8265) time: 0.1404 data: 0.0534 max mem: 9377 +Train: [36] [3800/6250] eta: 0:06:09 lr: 0.000094 grad: 0.0797 (0.0796) loss: 0.8213 (0.8264) time: 0.1432 data: 0.0669 max mem: 9377 +Train: [36] [3900/6250] eta: 0:05:53 lr: 0.000094 grad: 0.0776 (0.0796) loss: 0.8240 (0.8264) time: 0.1526 data: 0.0721 max mem: 9377 +Train: [36] [4000/6250] eta: 0:05:38 lr: 0.000094 grad: 0.0770 (0.0796) loss: 0.8184 (0.8263) time: 0.1544 data: 0.0728 max mem: 9377 +Train: [36] [4100/6250] eta: 0:05:24 lr: 0.000094 grad: 0.0782 (0.0796) loss: 0.8260 (0.8263) time: 0.1571 data: 0.0795 max mem: 9377 +Train: [36] [4200/6250] eta: 0:05:09 lr: 0.000094 grad: 0.0770 (0.0796) loss: 0.8209 (0.8263) time: 0.1377 data: 0.0615 max mem: 9377 +Train: [36] [4300/6250] eta: 0:04:54 lr: 0.000094 grad: 0.0720 (0.0795) loss: 0.8321 (0.8263) time: 0.1418 data: 0.0569 max mem: 9377 +Train: [36] [4400/6250] eta: 0:04:39 lr: 0.000094 grad: 0.0724 (0.0795) loss: 0.8326 (0.8262) time: 0.1804 data: 0.1021 max mem: 9377 +Train: [36] [4500/6250] eta: 0:04:23 lr: 0.000094 grad: 0.0793 (0.0795) loss: 0.8275 (0.8262) time: 0.1187 data: 0.0383 max mem: 9377 +Train: [36] [4600/6250] eta: 0:04:08 lr: 0.000094 grad: 0.0747 (0.0795) loss: 0.8246 (0.8261) time: 0.1220 data: 0.0404 max mem: 9377 +Train: [36] [4700/6250] eta: 0:03:52 lr: 0.000094 grad: 0.0723 (0.0794) loss: 0.8242 (0.8261) time: 0.1321 data: 0.0378 max mem: 9377 +Train: [36] [4800/6250] eta: 0:03:37 lr: 0.000094 grad: 0.0758 (0.0794) loss: 0.8319 (0.8260) time: 0.1613 data: 0.0774 max mem: 9377 +Train: [36] [4900/6250] eta: 0:03:22 lr: 0.000094 grad: 0.0764 (0.0794) loss: 0.8230 (0.8260) time: 0.1232 data: 0.0358 max mem: 9377 +Train: [36] [5000/6250] eta: 0:03:07 lr: 0.000094 grad: 0.0761 (0.0793) loss: 0.8282 (0.8260) time: 0.1412 data: 0.0595 max mem: 9377 +Train: [36] [5100/6250] eta: 0:02:51 lr: 0.000093 grad: 0.0749 (0.0793) loss: 0.8240 (0.8259) time: 0.1375 data: 0.0468 max mem: 9377 +Train: [36] [5200/6250] eta: 0:02:36 lr: 0.000093 grad: 0.0771 (0.0792) loss: 0.8219 (0.8259) time: 0.1502 data: 0.0721 max mem: 9377 +Train: [36] [5300/6250] eta: 0:02:21 lr: 0.000093 grad: 0.0750 (0.0792) loss: 0.8235 (0.8259) time: 0.1525 data: 0.0722 max mem: 9377 +Train: [36] [5400/6250] eta: 0:02:06 lr: 0.000093 grad: 0.0795 (0.0792) loss: 0.8314 (0.8259) time: 0.1422 data: 0.0588 max mem: 9377 +Train: [36] [5500/6250] eta: 0:01:51 lr: 0.000093 grad: 0.0708 (0.0791) loss: 0.8262 (0.8260) time: 0.1831 data: 0.0992 max mem: 9377 +Train: [36] [5600/6250] eta: 0:01:36 lr: 0.000093 grad: 0.0742 (0.0790) loss: 0.8287 (0.8260) time: 0.1522 data: 0.0718 max mem: 9377 +Train: [36] [5700/6250] eta: 0:01:22 lr: 0.000093 grad: 0.0723 (0.0790) loss: 0.8327 (0.8260) time: 0.1784 data: 0.0957 max mem: 9377 +Train: [36] [5800/6250] eta: 0:01:07 lr: 0.000093 grad: 0.0734 (0.0790) loss: 0.8293 (0.8261) time: 0.1558 data: 0.0788 max mem: 9377 +Train: [36] [5900/6250] eta: 0:00:52 lr: 0.000093 grad: 0.0781 (0.0790) loss: 0.8262 (0.8261) time: 0.1644 data: 0.0785 max mem: 9377 +Train: [36] [6000/6250] eta: 0:00:37 lr: 0.000093 grad: 0.0725 (0.0790) loss: 0.8316 (0.8261) time: 0.1550 data: 0.0792 max mem: 9377 +Train: [36] [6100/6250] eta: 0:00:22 lr: 0.000093 grad: 0.0729 (0.0790) loss: 0.8350 (0.8261) time: 0.1497 data: 0.0655 max mem: 9377 +Train: [36] [6200/6250] eta: 0:00:07 lr: 0.000093 grad: 0.0747 (0.0790) loss: 0.8271 (0.8262) time: 0.1519 data: 0.0690 max mem: 9377 +Train: [36] [6249/6250] eta: 0:00:00 lr: 0.000093 grad: 0.0729 (0.0790) loss: 0.8252 (0.8262) time: 0.1314 data: 0.0415 max mem: 9377 +Train: [36] Total time: 0:15:39 (0.1503 s / it) +Averaged stats: lr: 0.000093 grad: 0.0729 (0.0790) loss: 0.8252 (0.8262) +Eval (hcp-train-subset): [36] [ 0/62] eta: 0:03:20 loss: 0.8389 (0.8389) time: 3.2416 data: 3.1655 max mem: 9377 +Eval (hcp-train-subset): [36] [61/62] eta: 0:00:00 loss: 0.8357 (0.8360) time: 0.1334 data: 0.1069 max mem: 9377 +Eval (hcp-train-subset): [36] Total time: 0:00:13 (0.2203 s / it) +Averaged stats (hcp-train-subset): loss: 0.8357 (0.8360) +Eval (hcp-val): [36] [ 0/62] eta: 0:05:00 loss: 0.8328 (0.8328) time: 4.8513 data: 4.8217 max mem: 9377 +Eval (hcp-val): [36] [61/62] eta: 0:00:00 loss: 0.8373 (0.8381) time: 0.1212 data: 0.0963 max mem: 9377 +Eval (hcp-val): [36] Total time: 0:00:13 (0.2127 s / it) +Averaged stats (hcp-val): loss: 0.8373 (0.8381) +Eval (nsd-val): [36] [ 0/62] eta: 0:04:54 loss: 0.7993 (0.7993) time: 4.7521 data: 4.7221 max mem: 9377 +Eval (nsd-val): [36] [61/62] eta: 0:00:00 loss: 0.8111 (0.8116) time: 0.1172 data: 0.0919 max mem: 9377 +Eval (nsd-val): [36] Total time: 0:00:13 (0.2134 s / it) +Averaged stats (nsd-val): loss: 0.8111 (0.8116) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [37] [ 0/6250] eta: 8:48:07 lr: 0.000093 grad: 0.0570 (0.0570) loss: 0.8879 (0.8879) time: 5.0700 data: 4.8049 max mem: 9377 +Train: [37] [ 100/6250] eta: 0:21:47 lr: 0.000093 grad: 0.0813 (0.0963) loss: 0.8197 (0.8233) time: 0.1811 data: 0.0973 max mem: 9377 +Train: [37] [ 200/6250] eta: 0:18:32 lr: 0.000093 grad: 0.0788 (0.0910) loss: 0.8228 (0.8236) time: 0.1727 data: 0.0872 max mem: 9377 +Train: [37] [ 300/6250] eta: 0:17:09 lr: 0.000093 grad: 0.0751 (0.0883) loss: 0.8244 (0.8245) time: 0.1636 data: 0.0735 max mem: 9377 +Train: [37] [ 400/6250] eta: 0:16:06 lr: 0.000093 grad: 0.0756 (0.0871) loss: 0.8191 (0.8237) time: 0.1328 data: 0.0405 max mem: 9377 +Train: [37] [ 500/6250] eta: 0:15:23 lr: 0.000093 grad: 0.0764 (0.0862) loss: 0.8235 (0.8236) time: 0.1386 data: 0.0479 max mem: 9377 +Train: [37] [ 600/6250] eta: 0:14:56 lr: 0.000093 grad: 0.0824 (0.0850) loss: 0.8224 (0.8238) time: 0.1675 data: 0.0856 max mem: 9377 +Train: [37] [ 700/6250] eta: 0:14:30 lr: 0.000093 grad: 0.0796 (0.0838) loss: 0.8172 (0.8240) time: 0.1531 data: 0.0693 max mem: 9377 +Train: [37] [ 800/6250] eta: 0:14:07 lr: 0.000093 grad: 0.0735 (0.0829) loss: 0.8219 (0.8242) time: 0.1377 data: 0.0538 max mem: 9377 +Train: [37] [ 900/6250] eta: 0:13:52 lr: 0.000093 grad: 0.0725 (0.0820) loss: 0.8251 (0.8245) time: 0.1582 data: 0.0791 max mem: 9377 +Train: [37] [1000/6250] eta: 0:13:35 lr: 0.000093 grad: 0.0755 (0.0813) loss: 0.8258 (0.8248) time: 0.1402 data: 0.0534 max mem: 9377 +Train: [37] [1100/6250] eta: 0:13:15 lr: 0.000093 grad: 0.0797 (0.0811) loss: 0.8277 (0.8249) time: 0.1281 data: 0.0374 max mem: 9377 +Train: [37] [1200/6250] eta: 0:12:55 lr: 0.000093 grad: 0.0747 (0.0807) loss: 0.8249 (0.8249) time: 0.1373 data: 0.0518 max mem: 9377 +Train: [37] [1300/6250] eta: 0:12:32 lr: 0.000093 grad: 0.0758 (0.0805) loss: 0.8286 (0.8249) time: 0.1363 data: 0.0586 max mem: 9377 +Train: [37] [1400/6250] eta: 0:12:13 lr: 0.000093 grad: 0.0715 (0.0802) loss: 0.8206 (0.8248) time: 0.1377 data: 0.0542 max mem: 9377 +Train: [37] [1500/6250] eta: 0:11:55 lr: 0.000093 grad: 0.0717 (0.0802) loss: 0.8237 (0.8247) time: 0.1435 data: 0.0641 max mem: 9377 +Train: [37] [1600/6250] eta: 0:11:36 lr: 0.000093 grad: 0.0755 (0.0803) loss: 0.8321 (0.8247) time: 0.1349 data: 0.0540 max mem: 9377 +Train: [37] [1700/6250] eta: 0:11:20 lr: 0.000093 grad: 0.0747 (0.0802) loss: 0.8261 (0.8247) time: 0.1432 data: 0.0624 max mem: 9377 +Train: [37] [1800/6250] eta: 0:11:04 lr: 0.000093 grad: 0.0760 (0.0800) loss: 0.8262 (0.8248) time: 0.1276 data: 0.0466 max mem: 9377 +Train: [37] [1900/6250] eta: 0:10:50 lr: 0.000093 grad: 0.0788 (0.0799) loss: 0.8257 (0.8248) time: 0.1583 data: 0.0673 max mem: 9377 +Train: [37] [2000/6250] eta: 0:10:34 lr: 0.000093 grad: 0.0758 (0.0797) loss: 0.8317 (0.8249) time: 0.1458 data: 0.0658 max mem: 9377 +Train: [37] [2100/6250] eta: 0:10:20 lr: 0.000093 grad: 0.0788 (0.0796) loss: 0.8268 (0.8250) time: 0.1387 data: 0.0576 max mem: 9377 +Train: [37] [2200/6250] eta: 0:10:04 lr: 0.000093 grad: 0.0767 (0.0795) loss: 0.8244 (0.8250) time: 0.1507 data: 0.0724 max mem: 9377 +Train: [37] [2300/6250] eta: 0:09:52 lr: 0.000092 grad: 0.0765 (0.0795) loss: 0.8244 (0.8251) time: 0.1247 data: 0.0371 max mem: 9377 +Train: [37] [2400/6250] eta: 0:09:40 lr: 0.000092 grad: 0.0728 (0.0795) loss: 0.8211 (0.8251) time: 0.1530 data: 0.0711 max mem: 9377 +Train: [37] [2500/6250] eta: 0:09:26 lr: 0.000092 grad: 0.0768 (0.0795) loss: 0.8291 (0.8252) time: 0.1575 data: 0.0745 max mem: 9377 +Train: [37] [2600/6250] eta: 0:09:11 lr: 0.000092 grad: 0.0779 (0.0796) loss: 0.8275 (0.8251) time: 0.1586 data: 0.0777 max mem: 9377 +Train: [37] [2700/6250] eta: 0:08:56 lr: 0.000092 grad: 0.0794 (0.0796) loss: 0.8233 (0.8251) time: 0.1654 data: 0.0774 max mem: 9377 +Train: [37] [2800/6250] eta: 0:08:42 lr: 0.000092 grad: 0.0759 (0.0796) loss: 0.8254 (0.8251) time: 0.1569 data: 0.0725 max mem: 9377 +Train: [37] [2900/6250] eta: 0:08:28 lr: 0.000092 grad: 0.0857 (0.0798) loss: 0.8184 (0.8251) time: 0.1659 data: 0.0842 max mem: 9377 +Train: [37] [3000/6250] eta: 0:08:13 lr: 0.000092 grad: 0.0754 (0.0798) loss: 0.8264 (0.8251) time: 0.1604 data: 0.0780 max mem: 9377 +Train: [37] [3100/6250] eta: 0:07:58 lr: 0.000092 grad: 0.0759 (0.0799) loss: 0.8210 (0.8251) time: 0.1617 data: 0.0823 max mem: 9377 +Train: [37] [3200/6250] eta: 0:07:44 lr: 0.000092 grad: 0.0769 (0.0799) loss: 0.8284 (0.8251) time: 0.1848 data: 0.1052 max mem: 9377 +Train: [37] [3300/6250] eta: 0:07:29 lr: 0.000092 grad: 0.0799 (0.0800) loss: 0.8198 (0.8250) time: 0.1523 data: 0.0710 max mem: 9377 +Train: [37] [3400/6250] eta: 0:07:13 lr: 0.000092 grad: 0.0775 (0.0800) loss: 0.8278 (0.8251) time: 0.1423 data: 0.0557 max mem: 9377 +Train: [37] [3500/6250] eta: 0:06:57 lr: 0.000092 grad: 0.0727 (0.0800) loss: 0.8287 (0.8250) time: 0.1465 data: 0.0662 max mem: 9377 +Train: [37] [3600/6250] eta: 0:06:43 lr: 0.000092 grad: 0.0748 (0.0800) loss: 0.8302 (0.8250) time: 0.1964 data: 0.1156 max mem: 9377 +Train: [37] [3700/6250] eta: 0:06:28 lr: 0.000092 grad: 0.0821 (0.0801) loss: 0.8277 (0.8250) time: 0.1571 data: 0.0755 max mem: 9377 +Train: [37] [3800/6250] eta: 0:06:13 lr: 0.000092 grad: 0.0754 (0.0800) loss: 0.8250 (0.8250) time: 0.1873 data: 0.1025 max mem: 9377 +Train: [37] [3900/6250] eta: 0:05:59 lr: 0.000092 grad: 0.0758 (0.0800) loss: 0.8215 (0.8250) time: 0.1491 data: 0.0572 max mem: 9377 +Train: [37] [4000/6250] eta: 0:05:43 lr: 0.000092 grad: 0.0784 (0.0800) loss: 0.8238 (0.8250) time: 0.1602 data: 0.0746 max mem: 9377 +Train: [37] [4100/6250] eta: 0:05:28 lr: 0.000092 grad: 0.0806 (0.0800) loss: 0.8241 (0.8250) time: 0.1591 data: 0.0781 max mem: 9377 +Train: [37] [4200/6250] eta: 0:05:12 lr: 0.000092 grad: 0.0737 (0.0800) loss: 0.8261 (0.8250) time: 0.1418 data: 0.0609 max mem: 9377 +Train: [37] [4300/6250] eta: 0:04:56 lr: 0.000092 grad: 0.0796 (0.0801) loss: 0.8218 (0.8250) time: 0.1261 data: 0.0442 max mem: 9377 +Train: [37] [4400/6250] eta: 0:04:41 lr: 0.000092 grad: 0.0793 (0.0801) loss: 0.8279 (0.8250) time: 0.1443 data: 0.0580 max mem: 9377 +Train: [37] [4500/6250] eta: 0:04:25 lr: 0.000092 grad: 0.0804 (0.0800) loss: 0.8271 (0.8250) time: 0.1275 data: 0.0447 max mem: 9377 +Train: [37] [4600/6250] eta: 0:04:10 lr: 0.000092 grad: 0.0765 (0.0800) loss: 0.8277 (0.8250) time: 0.1342 data: 0.0509 max mem: 9377 +Train: [37] [4700/6250] eta: 0:03:54 lr: 0.000092 grad: 0.0779 (0.0801) loss: 0.8231 (0.8250) time: 0.1436 data: 0.0581 max mem: 9377 +Train: [37] [4800/6250] eta: 0:03:39 lr: 0.000092 grad: 0.0789 (0.0801) loss: 0.8294 (0.8250) time: 0.1619 data: 0.0858 max mem: 9377 +Train: [37] [4900/6250] eta: 0:03:24 lr: 0.000092 grad: 0.0738 (0.0801) loss: 0.8297 (0.8251) time: 0.1304 data: 0.0484 max mem: 9377 +Train: [37] [5000/6250] eta: 0:03:09 lr: 0.000092 grad: 0.0780 (0.0801) loss: 0.8313 (0.8251) time: 0.1945 data: 0.1153 max mem: 9377 +Train: [37] [5100/6250] eta: 0:02:54 lr: 0.000092 grad: 0.0802 (0.0803) loss: 0.8197 (0.8251) time: 0.1360 data: 0.0531 max mem: 9377 +Train: [37] [5200/6250] eta: 0:02:39 lr: 0.000092 grad: 0.0810 (0.0804) loss: 0.8253 (0.8251) time: 0.1524 data: 0.0768 max mem: 9377 +Train: [37] [5300/6250] eta: 0:02:24 lr: 0.000092 grad: 0.0786 (0.0803) loss: 0.8276 (0.8251) time: 0.1669 data: 0.0820 max mem: 9377 +Train: [37] [5400/6250] eta: 0:02:10 lr: 0.000092 grad: 0.0731 (0.0803) loss: 0.8294 (0.8252) time: 0.1603 data: 0.0748 max mem: 9377 +Train: [37] [5500/6250] eta: 0:01:54 lr: 0.000092 grad: 0.0729 (0.0802) loss: 0.8264 (0.8252) time: 0.1372 data: 0.0557 max mem: 9377 +Train: [37] [5600/6250] eta: 0:01:39 lr: 0.000092 grad: 0.0761 (0.0802) loss: 0.8276 (0.8252) time: 0.1542 data: 0.0697 max mem: 9377 +Train: [37] [5700/6250] eta: 0:01:24 lr: 0.000091 grad: 0.0761 (0.0802) loss: 0.8303 (0.8253) time: 0.1561 data: 0.0625 max mem: 9377 +Train: [37] [5800/6250] eta: 0:01:09 lr: 0.000091 grad: 0.0755 (0.0802) loss: 0.8273 (0.8254) time: 0.1503 data: 0.0675 max mem: 9377 +Train: [37] [5900/6250] eta: 0:00:53 lr: 0.000091 grad: 0.0825 (0.0803) loss: 0.8288 (0.8254) time: 0.1682 data: 0.0852 max mem: 9377 +Train: [37] [6000/6250] eta: 0:00:38 lr: 0.000091 grad: 0.0791 (0.0803) loss: 0.8246 (0.8254) time: 0.1505 data: 0.0723 max mem: 9377 +Train: [37] [6100/6250] eta: 0:00:23 lr: 0.000091 grad: 0.0793 (0.0803) loss: 0.8246 (0.8254) time: 0.1665 data: 0.0890 max mem: 9377 +Train: [37] [6200/6250] eta: 0:00:07 lr: 0.000091 grad: 0.0756 (0.0803) loss: 0.8288 (0.8254) time: 0.1507 data: 0.0693 max mem: 9377 +Train: [37] [6249/6250] eta: 0:00:00 lr: 0.000091 grad: 0.0727 (0.0803) loss: 0.8228 (0.8254) time: 0.1366 data: 0.0597 max mem: 9377 +Train: [37] Total time: 0:16:07 (0.1548 s / it) +Averaged stats: lr: 0.000091 grad: 0.0727 (0.0803) loss: 0.8228 (0.8254) +Eval (hcp-train-subset): [37] [ 0/62] eta: 0:03:55 loss: 0.8372 (0.8372) time: 3.7923 data: 3.6835 max mem: 9377 +Eval (hcp-train-subset): [37] [61/62] eta: 0:00:00 loss: 0.8347 (0.8358) time: 0.1224 data: 0.0971 max mem: 9377 +Eval (hcp-train-subset): [37] Total time: 0:00:14 (0.2327 s / it) +Averaged stats (hcp-train-subset): loss: 0.8347 (0.8358) +Eval (hcp-val): [37] [ 0/62] eta: 0:04:54 loss: 0.8338 (0.8338) time: 4.7450 data: 4.7134 max mem: 9377 +Eval (hcp-val): [37] [61/62] eta: 0:00:00 loss: 0.8375 (0.8385) time: 0.1349 data: 0.1083 max mem: 9377 +Eval (hcp-val): [37] Total time: 0:00:13 (0.2121 s / it) +Averaged stats (hcp-val): loss: 0.8375 (0.8385) +Eval (nsd-val): [37] [ 0/62] eta: 0:04:43 loss: 0.8006 (0.8006) time: 4.5659 data: 4.5323 max mem: 9377 +Eval (nsd-val): [37] [61/62] eta: 0:00:00 loss: 0.8105 (0.8112) time: 0.1173 data: 0.0903 max mem: 9377 +Eval (nsd-val): [37] Total time: 0:00:12 (0.2086 s / it) +Averaged stats (nsd-val): loss: 0.8105 (0.8112) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [38] [ 0/6250] eta: 10:11:20 lr: 0.000091 grad: 0.1911 (0.1911) loss: 0.8253 (0.8253) time: 5.8689 data: 5.7543 max mem: 9377 +Train: [38] [ 100/6250] eta: 0:19:59 lr: 0.000091 grad: 0.0774 (0.0943) loss: 0.8359 (0.8306) time: 0.1543 data: 0.0558 max mem: 9377 +Train: [38] [ 200/6250] eta: 0:17:13 lr: 0.000091 grad: 0.0811 (0.0874) loss: 0.8154 (0.8279) time: 0.1382 data: 0.0522 max mem: 9377 +Train: [38] [ 300/6250] eta: 0:16:00 lr: 0.000091 grad: 0.0770 (0.0846) loss: 0.8266 (0.8267) time: 0.1056 data: 0.0121 max mem: 9377 +Train: [38] [ 400/6250] eta: 0:15:14 lr: 0.000091 grad: 0.0774 (0.0832) loss: 0.8248 (0.8261) time: 0.1360 data: 0.0440 max mem: 9377 +Train: [38] [ 500/6250] eta: 0:14:41 lr: 0.000091 grad: 0.0716 (0.0822) loss: 0.8314 (0.8262) time: 0.1429 data: 0.0511 max mem: 9377 +Train: [38] [ 600/6250] eta: 0:14:18 lr: 0.000091 grad: 0.0763 (0.0813) loss: 0.8271 (0.8263) time: 0.1355 data: 0.0462 max mem: 9377 +Train: [38] [ 700/6250] eta: 0:13:58 lr: 0.000091 grad: 0.0744 (0.0809) loss: 0.8232 (0.8259) time: 0.1452 data: 0.0515 max mem: 9377 +Train: [38] [ 800/6250] eta: 0:13:42 lr: 0.000091 grad: 0.0750 (0.0807) loss: 0.8216 (0.8257) time: 0.1556 data: 0.0745 max mem: 9377 +Train: [38] [ 900/6250] eta: 0:13:27 lr: 0.000091 grad: 0.0822 (0.0806) loss: 0.8266 (0.8258) time: 0.1269 data: 0.0427 max mem: 9377 +Train: [38] [1000/6250] eta: 0:13:07 lr: 0.000091 grad: 0.0772 (0.0805) loss: 0.8305 (0.8261) time: 0.1308 data: 0.0452 max mem: 9377 +Train: [38] [1100/6250] eta: 0:12:50 lr: 0.000091 grad: 0.0740 (0.0806) loss: 0.8247 (0.8261) time: 0.1632 data: 0.0781 max mem: 9377 +Train: [38] [1200/6250] eta: 0:12:29 lr: 0.000091 grad: 0.0803 (0.0809) loss: 0.8246 (0.8259) time: 0.1373 data: 0.0519 max mem: 9377 +Train: [38] [1300/6250] eta: 0:12:13 lr: 0.000091 grad: 0.0778 (0.0808) loss: 0.8268 (0.8259) time: 0.1640 data: 0.0860 max mem: 9377 +Train: [38] [1400/6250] eta: 0:11:56 lr: 0.000091 grad: 0.0776 (0.0809) loss: 0.8233 (0.8257) time: 0.1401 data: 0.0573 max mem: 9377 +Train: [38] [1500/6250] eta: 0:11:39 lr: 0.000091 grad: 0.0744 (0.0809) loss: 0.8262 (0.8256) time: 0.1432 data: 0.0613 max mem: 9377 +Train: [38] [1600/6250] eta: 0:11:22 lr: 0.000091 grad: 0.0770 (0.0809) loss: 0.8294 (0.8256) time: 0.1409 data: 0.0541 max mem: 9377 +Train: [38] [1700/6250] eta: 0:11:03 lr: 0.000091 grad: 0.0784 (0.0808) loss: 0.8184 (0.8254) time: 0.1201 data: 0.0369 max mem: 9377 +Train: [38] [1800/6250] eta: 0:10:47 lr: 0.000091 grad: 0.0771 (0.0809) loss: 0.8221 (0.8254) time: 0.1322 data: 0.0516 max mem: 9377 +Train: [38] [1900/6250] eta: 0:10:32 lr: 0.000091 grad: 0.0735 (0.0808) loss: 0.8290 (0.8252) time: 0.1294 data: 0.0451 max mem: 9377 +Train: [38] [2000/6250] eta: 0:10:17 lr: 0.000091 grad: 0.0818 (0.0808) loss: 0.8198 (0.8253) time: 0.1290 data: 0.0389 max mem: 9377 +Train: [38] [2100/6250] eta: 0:10:01 lr: 0.000091 grad: 0.0816 (0.0809) loss: 0.8241 (0.8253) time: 0.1481 data: 0.0644 max mem: 9377 +Train: [38] [2200/6250] eta: 0:09:46 lr: 0.000091 grad: 0.0839 (0.0811) loss: 0.8199 (0.8251) time: 0.1424 data: 0.0548 max mem: 9377 +Train: [38] [2300/6250] eta: 0:09:32 lr: 0.000091 grad: 0.0821 (0.0812) loss: 0.8180 (0.8250) time: 0.1422 data: 0.0609 max mem: 9377 +Train: [38] [2400/6250] eta: 0:09:18 lr: 0.000091 grad: 0.0819 (0.0814) loss: 0.8183 (0.8248) time: 0.1603 data: 0.0768 max mem: 9377 +Train: [38] [2500/6250] eta: 0:09:05 lr: 0.000091 grad: 0.0859 (0.0816) loss: 0.8269 (0.8246) time: 0.1957 data: 0.1182 max mem: 9377 +Train: [38] [2600/6250] eta: 0:08:50 lr: 0.000091 grad: 0.0846 (0.0818) loss: 0.8131 (0.8244) time: 0.1286 data: 0.0431 max mem: 9377 +Train: [38] [2700/6250] eta: 0:08:36 lr: 0.000091 grad: 0.0812 (0.0820) loss: 0.8226 (0.8242) time: 0.1636 data: 0.0776 max mem: 9377 +Train: [38] [2800/6250] eta: 0:08:22 lr: 0.000091 grad: 0.0775 (0.0821) loss: 0.8222 (0.8242) time: 0.1642 data: 0.0876 max mem: 9377 +Train: [38] [2900/6250] eta: 0:08:09 lr: 0.000090 grad: 0.0763 (0.0821) loss: 0.8291 (0.8241) time: 0.1807 data: 0.1021 max mem: 9377 +Train: [38] [3000/6250] eta: 0:07:55 lr: 0.000090 grad: 0.0837 (0.0822) loss: 0.8164 (0.8240) time: 0.1352 data: 0.0517 max mem: 9377 +Train: [38] [3100/6250] eta: 0:07:42 lr: 0.000090 grad: 0.0772 (0.0823) loss: 0.8247 (0.8240) time: 0.1762 data: 0.0981 max mem: 9377 +Train: [38] [3200/6250] eta: 0:07:28 lr: 0.000090 grad: 0.0834 (0.0824) loss: 0.8204 (0.8238) time: 0.1231 data: 0.0393 max mem: 9377 +Train: [38] [3300/6250] eta: 0:07:14 lr: 0.000090 grad: 0.0800 (0.0825) loss: 0.8219 (0.8238) time: 0.1440 data: 0.0581 max mem: 9377 +Train: [38] [3400/6250] eta: 0:07:00 lr: 0.000090 grad: 0.0793 (0.0825) loss: 0.8206 (0.8237) time: 0.1590 data: 0.0807 max mem: 9377 +Train: [38] [3500/6250] eta: 0:06:47 lr: 0.000090 grad: 0.0781 (0.0826) loss: 0.8242 (0.8236) time: 0.2199 data: 0.1362 max mem: 9377 +Train: [38] [3600/6250] eta: 0:06:33 lr: 0.000090 grad: 0.0798 (0.0826) loss: 0.8178 (0.8235) time: 0.1663 data: 0.0832 max mem: 9377 +Train: [38] [3700/6250] eta: 0:06:19 lr: 0.000090 grad: 0.0767 (0.0826) loss: 0.8226 (0.8235) time: 0.1619 data: 0.0829 max mem: 9377 +Train: [38] [3800/6250] eta: 0:06:05 lr: 0.000090 grad: 0.0769 (0.0826) loss: 0.8239 (0.8234) time: 0.1568 data: 0.0780 max mem: 9377 +Train: [38] [3900/6250] eta: 0:05:51 lr: 0.000090 grad: 0.0818 (0.0826) loss: 0.8147 (0.8233) time: 0.1650 data: 0.0776 max mem: 9377 +Train: [38] [4000/6250] eta: 0:05:37 lr: 0.000090 grad: 0.0804 (0.0826) loss: 0.8243 (0.8233) time: 0.1635 data: 0.0753 max mem: 9377 +Train: [38] [4100/6250] eta: 0:05:22 lr: 0.000090 grad: 0.0795 (0.0826) loss: 0.8204 (0.8232) time: 0.1301 data: 0.0468 max mem: 9377 +Train: [38] [4200/6250] eta: 0:05:06 lr: 0.000090 grad: 0.0813 (0.0827) loss: 0.8261 (0.8231) time: 0.1472 data: 0.0621 max mem: 9377 +Train: [38] [4300/6250] eta: 0:04:51 lr: 0.000090 grad: 0.0812 (0.0827) loss: 0.8243 (0.8230) time: 0.1239 data: 0.0373 max mem: 9377 +Train: [38] [4400/6250] eta: 0:04:35 lr: 0.000090 grad: 0.0845 (0.0827) loss: 0.8215 (0.8230) time: 0.1250 data: 0.0439 max mem: 9377 +Train: [38] [4500/6250] eta: 0:04:20 lr: 0.000090 grad: 0.0799 (0.0827) loss: 0.8195 (0.8230) time: 0.1452 data: 0.0599 max mem: 9377 +Train: [38] [4600/6250] eta: 0:04:05 lr: 0.000090 grad: 0.0802 (0.0827) loss: 0.8266 (0.8230) time: 0.1536 data: 0.0687 max mem: 9377 +Train: [38] [4700/6250] eta: 0:03:51 lr: 0.000090 grad: 0.0812 (0.0827) loss: 0.8206 (0.8230) time: 0.1517 data: 0.0673 max mem: 9377 +Train: [38] [4800/6250] eta: 0:03:36 lr: 0.000090 grad: 0.0816 (0.0827) loss: 0.8252 (0.8230) time: 0.1593 data: 0.0760 max mem: 9377 +Train: [38] [4900/6250] eta: 0:03:21 lr: 0.000090 grad: 0.0752 (0.0827) loss: 0.8291 (0.8231) time: 0.1431 data: 0.0643 max mem: 9377 +Train: [38] [5000/6250] eta: 0:03:07 lr: 0.000090 grad: 0.0750 (0.0826) loss: 0.8278 (0.8232) time: 0.1655 data: 0.0881 max mem: 9377 +Train: [38] [5100/6250] eta: 0:02:52 lr: 0.000090 grad: 0.0771 (0.0826) loss: 0.8213 (0.8232) time: 0.1863 data: 0.1074 max mem: 9377 +Train: [38] [5200/6250] eta: 0:02:37 lr: 0.000090 grad: 0.0754 (0.0826) loss: 0.8231 (0.8232) time: 0.1631 data: 0.0727 max mem: 9377 +Train: [38] [5300/6250] eta: 0:02:22 lr: 0.000090 grad: 0.0844 (0.0826) loss: 0.8247 (0.8232) time: 0.1296 data: 0.0477 max mem: 9377 +Train: [38] [5400/6250] eta: 0:02:07 lr: 0.000090 grad: 0.0841 (0.0826) loss: 0.8252 (0.8232) time: 0.1689 data: 0.0897 max mem: 9377 +Train: [38] [5500/6250] eta: 0:01:52 lr: 0.000090 grad: 0.0752 (0.0825) loss: 0.8192 (0.8232) time: 0.1453 data: 0.0645 max mem: 9377 +Train: [38] [5600/6250] eta: 0:01:37 lr: 0.000090 grad: 0.0732 (0.0825) loss: 0.8184 (0.8232) time: 0.1403 data: 0.0526 max mem: 9377 +Train: [38] [5700/6250] eta: 0:01:22 lr: 0.000090 grad: 0.0816 (0.0825) loss: 0.8245 (0.8232) time: 0.1265 data: 0.0437 max mem: 9377 +Train: [38] [5800/6250] eta: 0:01:07 lr: 0.000090 grad: 0.0803 (0.0825) loss: 0.8182 (0.8231) time: 0.1483 data: 0.0670 max mem: 9377 +Train: [38] [5900/6250] eta: 0:00:52 lr: 0.000090 grad: 0.0817 (0.0825) loss: 0.8221 (0.8231) time: 0.1390 data: 0.0529 max mem: 9377 +Train: [38] [6000/6250] eta: 0:00:37 lr: 0.000090 grad: 0.0822 (0.0825) loss: 0.8147 (0.8230) time: 0.1146 data: 0.0272 max mem: 9377 +Train: [38] [6100/6250] eta: 0:00:22 lr: 0.000090 grad: 0.0783 (0.0826) loss: 0.8202 (0.8230) time: 0.1533 data: 0.0560 max mem: 9377 +Train: [38] [6200/6250] eta: 0:00:07 lr: 0.000089 grad: 0.0818 (0.0826) loss: 0.8152 (0.8229) time: 0.1394 data: 0.0422 max mem: 9377 +Train: [38] [6249/6250] eta: 0:00:00 lr: 0.000089 grad: 0.0840 (0.0826) loss: 0.8151 (0.8229) time: 0.1431 data: 0.0510 max mem: 9377 +Train: [38] Total time: 0:15:36 (0.1498 s / it) +Averaged stats: lr: 0.000089 grad: 0.0840 (0.0826) loss: 0.8151 (0.8229) +Eval (hcp-train-subset): [38] [ 0/62] eta: 0:06:34 loss: 0.8345 (0.8345) time: 6.3650 data: 6.3319 max mem: 9377 +Eval (hcp-train-subset): [38] [61/62] eta: 0:00:00 loss: 0.8326 (0.8351) time: 0.1175 data: 0.0916 max mem: 9377 +Eval (hcp-train-subset): [38] Total time: 0:00:14 (0.2376 s / it) +Averaged stats (hcp-train-subset): loss: 0.8326 (0.8351) +Eval (hcp-val): [38] [ 0/62] eta: 0:06:28 loss: 0.8352 (0.8352) time: 6.2720 data: 6.2401 max mem: 9377 +Eval (hcp-val): [38] [61/62] eta: 0:00:00 loss: 0.8357 (0.8373) time: 0.1418 data: 0.1167 max mem: 9377 +Eval (hcp-val): [38] Total time: 0:00:14 (0.2299 s / it) +Averaged stats (hcp-val): loss: 0.8357 (0.8373) +Eval (nsd-val): [38] [ 0/62] eta: 0:05:46 loss: 0.7960 (0.7960) time: 5.5819 data: 5.5508 max mem: 9377 +Eval (nsd-val): [38] [61/62] eta: 0:00:00 loss: 0.8078 (0.8094) time: 0.1585 data: 0.1332 max mem: 9377 +Eval (nsd-val): [38] Total time: 0:00:14 (0.2341 s / it) +Averaged stats (nsd-val): loss: 0.8078 (0.8094) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [39] [ 0/6250] eta: 10:49:25 lr: 0.000089 grad: 0.1486 (0.1486) loss: 0.7992 (0.7992) time: 6.2345 data: 6.1051 max mem: 9377 +Train: [39] [ 100/6250] eta: 0:22:08 lr: 0.000089 grad: 0.0843 (0.1025) loss: 0.8217 (0.8289) time: 0.1598 data: 0.0620 max mem: 9377 +Train: [39] [ 200/6250] eta: 0:18:54 lr: 0.000089 grad: 0.0783 (0.0976) loss: 0.8213 (0.8242) time: 0.1597 data: 0.0573 max mem: 9377 +Train: [39] [ 300/6250] eta: 0:17:46 lr: 0.000089 grad: 0.0778 (0.0969) loss: 0.8239 (0.8236) time: 0.1504 data: 0.0509 max mem: 9377 +Train: [39] [ 400/6250] eta: 0:16:53 lr: 0.000089 grad: 0.0730 (0.0926) loss: 0.8274 (0.8242) time: 0.1540 data: 0.0624 max mem: 9377 +Train: [39] [ 500/6250] eta: 0:16:08 lr: 0.000089 grad: 0.0752 (0.0901) loss: 0.8194 (0.8250) time: 0.1459 data: 0.0494 max mem: 9377 +Train: [39] [ 600/6250] eta: 0:15:34 lr: 0.000089 grad: 0.0754 (0.0879) loss: 0.8247 (0.8254) time: 0.1639 data: 0.0721 max mem: 9377 +Train: [39] [ 700/6250] eta: 0:15:08 lr: 0.000089 grad: 0.0769 (0.0867) loss: 0.8221 (0.8252) time: 0.1532 data: 0.0717 max mem: 9377 +Train: [39] [ 800/6250] eta: 0:14:44 lr: 0.000089 grad: 0.0747 (0.0857) loss: 0.8207 (0.8250) time: 0.1490 data: 0.0610 max mem: 9377 +Train: [39] [ 900/6250] eta: 0:14:23 lr: 0.000089 grad: 0.0783 (0.0851) loss: 0.8308 (0.8250) time: 0.1533 data: 0.0707 max mem: 9377 +Train: [39] [1000/6250] eta: 0:14:00 lr: 0.000089 grad: 0.0748 (0.0845) loss: 0.8326 (0.8249) time: 0.1539 data: 0.0658 max mem: 9377 +Train: [39] [1100/6250] eta: 0:13:38 lr: 0.000089 grad: 0.0789 (0.0843) loss: 0.8198 (0.8247) time: 0.1443 data: 0.0573 max mem: 9377 +Train: [39] [1200/6250] eta: 0:13:16 lr: 0.000089 grad: 0.0718 (0.0838) loss: 0.8317 (0.8248) time: 0.1438 data: 0.0644 max mem: 9377 +Train: [39] [1300/6250] eta: 0:12:56 lr: 0.000089 grad: 0.0791 (0.0834) loss: 0.8254 (0.8249) time: 0.1542 data: 0.0736 max mem: 9377 +Train: [39] [1400/6250] eta: 0:12:37 lr: 0.000089 grad: 0.0750 (0.0831) loss: 0.8297 (0.8250) time: 0.1415 data: 0.0588 max mem: 9377 +Train: [39] [1500/6250] eta: 0:12:18 lr: 0.000089 grad: 0.0792 (0.0829) loss: 0.8254 (0.8251) time: 0.1417 data: 0.0594 max mem: 9377 +Train: [39] [1600/6250] eta: 0:11:58 lr: 0.000089 grad: 0.0798 (0.0830) loss: 0.8166 (0.8250) time: 0.1510 data: 0.0642 max mem: 9377 +Train: [39] [1700/6250] eta: 0:11:39 lr: 0.000089 grad: 0.0778 (0.0829) loss: 0.8235 (0.8248) time: 0.1480 data: 0.0629 max mem: 9377 +Train: [39] [1800/6250] eta: 0:11:20 lr: 0.000089 grad: 0.0791 (0.0828) loss: 0.8225 (0.8248) time: 0.1429 data: 0.0666 max mem: 9377 +Train: [39] [1900/6250] eta: 0:11:01 lr: 0.000089 grad: 0.0814 (0.0828) loss: 0.8237 (0.8247) time: 0.1272 data: 0.0461 max mem: 9377 +Train: [39] [2000/6250] eta: 0:10:44 lr: 0.000089 grad: 0.0803 (0.0827) loss: 0.8272 (0.8247) time: 0.1178 data: 0.0316 max mem: 9377 +Train: [39] [2100/6250] eta: 0:10:27 lr: 0.000089 grad: 0.0799 (0.0829) loss: 0.8215 (0.8246) time: 0.1317 data: 0.0459 max mem: 9377 +Train: [39] [2200/6250] eta: 0:10:10 lr: 0.000089 grad: 0.0730 (0.0826) loss: 0.8242 (0.8246) time: 0.1502 data: 0.0692 max mem: 9377 +Train: [39] [2300/6250] eta: 0:09:52 lr: 0.000089 grad: 0.0784 (0.0827) loss: 0.8235 (0.8244) time: 0.1247 data: 0.0421 max mem: 9377 +Train: [39] [2400/6250] eta: 0:09:35 lr: 0.000089 grad: 0.0757 (0.0826) loss: 0.8321 (0.8245) time: 0.1442 data: 0.0599 max mem: 9377 +Train: [39] [2500/6250] eta: 0:09:20 lr: 0.000089 grad: 0.0755 (0.0825) loss: 0.8232 (0.8246) time: 0.1385 data: 0.0589 max mem: 9377 +Train: [39] [2600/6250] eta: 0:09:03 lr: 0.000089 grad: 0.0783 (0.0824) loss: 0.8231 (0.8246) time: 0.1478 data: 0.0697 max mem: 9377 +Train: [39] [2700/6250] eta: 0:08:48 lr: 0.000089 grad: 0.0758 (0.0823) loss: 0.8222 (0.8246) time: 0.1295 data: 0.0464 max mem: 9377 +Train: [39] [2800/6250] eta: 0:08:32 lr: 0.000089 grad: 0.0781 (0.0822) loss: 0.8296 (0.8247) time: 0.1632 data: 0.0820 max mem: 9377 +Train: [39] [2900/6250] eta: 0:08:16 lr: 0.000089 grad: 0.0776 (0.0821) loss: 0.8298 (0.8247) time: 0.1621 data: 0.0803 max mem: 9377 +Train: [39] [3000/6250] eta: 0:08:01 lr: 0.000089 grad: 0.0804 (0.0819) loss: 0.8262 (0.8248) time: 0.1397 data: 0.0618 max mem: 9377 +Train: [39] [3100/6250] eta: 0:07:46 lr: 0.000089 grad: 0.0760 (0.0818) loss: 0.8310 (0.8249) time: 0.1578 data: 0.0781 max mem: 9377 +Train: [39] [3200/6250] eta: 0:07:30 lr: 0.000089 grad: 0.0768 (0.0818) loss: 0.8272 (0.8250) time: 0.1503 data: 0.0659 max mem: 9377 +Train: [39] [3300/6250] eta: 0:07:16 lr: 0.000088 grad: 0.0757 (0.0817) loss: 0.8309 (0.8250) time: 0.1470 data: 0.0676 max mem: 9377 +Train: [39] [3400/6250] eta: 0:07:02 lr: 0.000088 grad: 0.0812 (0.0817) loss: 0.8212 (0.8250) time: 0.1559 data: 0.0683 max mem: 9377 +Train: [39] [3500/6250] eta: 0:06:47 lr: 0.000088 grad: 0.0839 (0.0817) loss: 0.8281 (0.8251) time: 0.1406 data: 0.0611 max mem: 9377 +Train: [39] [3600/6250] eta: 0:06:32 lr: 0.000088 grad: 0.0849 (0.0818) loss: 0.8202 (0.8250) time: 0.1435 data: 0.0598 max mem: 9377 +Train: [39] [3700/6250] eta: 0:06:17 lr: 0.000088 grad: 0.0788 (0.0819) loss: 0.8249 (0.8250) time: 0.1461 data: 0.0693 max mem: 9377 +Train: [39] [3800/6250] eta: 0:06:02 lr: 0.000088 grad: 0.0878 (0.0820) loss: 0.8187 (0.8249) time: 0.1594 data: 0.0803 max mem: 9377 +Train: [39] [3900/6250] eta: 0:05:47 lr: 0.000088 grad: 0.0840 (0.0820) loss: 0.8287 (0.8249) time: 0.1611 data: 0.0749 max mem: 9377 +Train: [39] [4000/6250] eta: 0:05:31 lr: 0.000088 grad: 0.0807 (0.0820) loss: 0.8286 (0.8249) time: 0.1209 data: 0.0370 max mem: 9377 +Train: [39] [4100/6250] eta: 0:05:16 lr: 0.000088 grad: 0.0795 (0.0820) loss: 0.8214 (0.8248) time: 0.1166 data: 0.0291 max mem: 9377 +Train: [39] [4200/6250] eta: 0:05:01 lr: 0.000088 grad: 0.0873 (0.0821) loss: 0.8186 (0.8248) time: 0.1434 data: 0.0557 max mem: 9377 +Train: [39] [4300/6250] eta: 0:04:46 lr: 0.000088 grad: 0.0900 (0.0822) loss: 0.8244 (0.8247) time: 0.1390 data: 0.0535 max mem: 9377 +Train: [39] [4400/6250] eta: 0:04:31 lr: 0.000088 grad: 0.0815 (0.0823) loss: 0.8245 (0.8246) time: 0.1296 data: 0.0395 max mem: 9377 +Train: [39] [4500/6250] eta: 0:04:16 lr: 0.000088 grad: 0.0753 (0.0823) loss: 0.8234 (0.8246) time: 0.1260 data: 0.0354 max mem: 9377 +Train: [39] [4600/6250] eta: 0:04:01 lr: 0.000088 grad: 0.0852 (0.0824) loss: 0.8199 (0.8246) time: 0.1432 data: 0.0645 max mem: 9377 +Train: [39] [4700/6250] eta: 0:03:46 lr: 0.000088 grad: 0.0790 (0.0825) loss: 0.8214 (0.8246) time: 0.1433 data: 0.0616 max mem: 9377 +Train: [39] [4800/6250] eta: 0:03:31 lr: 0.000088 grad: 0.0766 (0.0825) loss: 0.8232 (0.8245) time: 0.1348 data: 0.0451 max mem: 9377 +Train: [39] [4900/6250] eta: 0:03:17 lr: 0.000088 grad: 0.0846 (0.0826) loss: 0.8248 (0.8245) time: 0.1824 data: 0.1007 max mem: 9377 +Train: [39] [5000/6250] eta: 0:03:02 lr: 0.000088 grad: 0.0820 (0.0827) loss: 0.8205 (0.8244) time: 0.1610 data: 0.0848 max mem: 9377 +Train: [39] [5100/6250] eta: 0:02:48 lr: 0.000088 grad: 0.0790 (0.0827) loss: 0.8268 (0.8244) time: 0.1460 data: 0.0604 max mem: 9377 +Train: [39] [5200/6250] eta: 0:02:33 lr: 0.000088 grad: 0.0795 (0.0827) loss: 0.8260 (0.8243) time: 0.1697 data: 0.0897 max mem: 9377 +Train: [39] [5300/6250] eta: 0:02:19 lr: 0.000088 grad: 0.0804 (0.0826) loss: 0.8239 (0.8243) time: 0.1456 data: 0.0612 max mem: 9377 +Train: [39] [5400/6250] eta: 0:02:04 lr: 0.000088 grad: 0.0797 (0.0826) loss: 0.8228 (0.8243) time: 0.1420 data: 0.0641 max mem: 9377 +Train: [39] [5500/6250] eta: 0:01:49 lr: 0.000088 grad: 0.0768 (0.0826) loss: 0.8280 (0.8243) time: 0.1579 data: 0.0726 max mem: 9377 +Train: [39] [5600/6250] eta: 0:01:35 lr: 0.000088 grad: 0.0839 (0.0827) loss: 0.8174 (0.8243) time: 0.1259 data: 0.0433 max mem: 9377 +Train: [39] [5700/6250] eta: 0:01:20 lr: 0.000088 grad: 0.0789 (0.0827) loss: 0.8241 (0.8243) time: 0.1357 data: 0.0469 max mem: 9377 +Train: [39] [5800/6250] eta: 0:01:05 lr: 0.000088 grad: 0.0804 (0.0827) loss: 0.8174 (0.8242) time: 0.1405 data: 0.0594 max mem: 9377 +Train: [39] [5900/6250] eta: 0:00:50 lr: 0.000088 grad: 0.0771 (0.0827) loss: 0.8204 (0.8242) time: 0.1395 data: 0.0596 max mem: 9377 +Train: [39] [6000/6250] eta: 0:00:36 lr: 0.000088 grad: 0.0747 (0.0827) loss: 0.8302 (0.8241) time: 0.1743 data: 0.0915 max mem: 9377 +Train: [39] [6100/6250] eta: 0:00:21 lr: 0.000088 grad: 0.0776 (0.0827) loss: 0.8290 (0.8241) time: 0.1629 data: 0.0810 max mem: 9377 +Train: [39] [6200/6250] eta: 0:00:07 lr: 0.000088 grad: 0.0843 (0.0828) loss: 0.8302 (0.8241) time: 0.1669 data: 0.0875 max mem: 9377 +Train: [39] [6249/6250] eta: 0:00:00 lr: 0.000088 grad: 0.0801 (0.0828) loss: 0.8245 (0.8241) time: 0.1724 data: 0.0956 max mem: 9377 +Train: [39] Total time: 0:15:20 (0.1474 s / it) +Averaged stats: lr: 0.000088 grad: 0.0801 (0.0828) loss: 0.8245 (0.8241) +Eval (hcp-train-subset): [39] [ 0/62] eta: 0:04:05 loss: 0.8363 (0.8363) time: 3.9667 data: 3.8752 max mem: 9377 +Eval (hcp-train-subset): [39] [61/62] eta: 0:00:00 loss: 0.8323 (0.8343) time: 0.1246 data: 0.0992 max mem: 9377 +Eval (hcp-train-subset): [39] Total time: 0:00:14 (0.2277 s / it) +Averaged stats (hcp-train-subset): loss: 0.8323 (0.8343) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [39] [ 0/62] eta: 0:04:38 loss: 0.8324 (0.8324) time: 4.4970 data: 4.4652 max mem: 9377 +Eval (hcp-val): [39] [61/62] eta: 0:00:00 loss: 0.8357 (0.8374) time: 0.1286 data: 0.1013 max mem: 9377 +Eval (hcp-val): [39] Total time: 0:00:13 (0.2171 s / it) +Averaged stats (hcp-val): loss: 0.8357 (0.8374) +Making plots (hcp-val): example=55 +Eval (nsd-val): [39] [ 0/62] eta: 0:03:19 loss: 0.8000 (0.8000) time: 3.2124 data: 3.1447 max mem: 9377 +Eval (nsd-val): [39] [61/62] eta: 0:00:00 loss: 0.8094 (0.8092) time: 0.1302 data: 0.1033 max mem: 9377 +Eval (nsd-val): [39] Total time: 0:00:12 (0.2050 s / it) +Averaged stats (nsd-val): loss: 0.8094 (0.8092) +Making plots (nsd-val): example=49 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00039.pth +Train: [40] [ 0/6250] eta: 8:54:55 lr: 0.000088 grad: 0.1978 (0.1978) loss: 0.8274 (0.8274) time: 5.1353 data: 4.9421 max mem: 9377 +Train: [40] [ 100/6250] eta: 0:21:02 lr: 0.000088 grad: 0.0730 (0.0810) loss: 0.8403 (0.8462) time: 0.1500 data: 0.0507 max mem: 9377 +Train: [40] [ 200/6250] eta: 0:18:43 lr: 0.000088 grad: 0.0689 (0.0875) loss: 0.8357 (0.8402) time: 0.1781 data: 0.0809 max mem: 9377 +Train: [40] [ 300/6250] eta: 0:17:23 lr: 0.000088 grad: 0.0842 (0.0868) loss: 0.8318 (0.8363) time: 0.1542 data: 0.0615 max mem: 9377 +Train: [40] [ 400/6250] eta: 0:16:32 lr: 0.000087 grad: 0.0780 (0.0865) loss: 0.8231 (0.8335) time: 0.1327 data: 0.0411 max mem: 9377 +Train: [40] [ 500/6250] eta: 0:15:55 lr: 0.000087 grad: 0.0877 (0.0859) loss: 0.8207 (0.8319) time: 0.1575 data: 0.0671 max mem: 9377 +Train: [40] [ 600/6250] eta: 0:15:18 lr: 0.000087 grad: 0.0866 (0.0855) loss: 0.8157 (0.8305) time: 0.1558 data: 0.0726 max mem: 9377 +Train: [40] [ 700/6250] eta: 0:14:51 lr: 0.000087 grad: 0.0751 (0.0853) loss: 0.8237 (0.8296) time: 0.1699 data: 0.0864 max mem: 9377 +Train: [40] [ 800/6250] eta: 0:14:26 lr: 0.000087 grad: 0.0773 (0.0846) loss: 0.8279 (0.8289) time: 0.1637 data: 0.0807 max mem: 9377 +Train: [40] [ 900/6250] eta: 0:14:06 lr: 0.000087 grad: 0.0790 (0.0840) loss: 0.8247 (0.8286) time: 0.1656 data: 0.0759 max mem: 9377 +Train: [40] [1000/6250] eta: 0:13:40 lr: 0.000087 grad: 0.0767 (0.0835) loss: 0.8232 (0.8282) time: 0.1356 data: 0.0516 max mem: 9377 +Train: [40] [1100/6250] eta: 0:13:19 lr: 0.000087 grad: 0.0792 (0.0829) loss: 0.8198 (0.8280) time: 0.1468 data: 0.0707 max mem: 9377 +Train: [40] [1200/6250] eta: 0:13:01 lr: 0.000087 grad: 0.0776 (0.0827) loss: 0.8253 (0.8277) time: 0.1704 data: 0.0923 max mem: 9377 +Train: [40] [1300/6250] eta: 0:12:40 lr: 0.000087 grad: 0.0785 (0.0826) loss: 0.8199 (0.8274) time: 0.1519 data: 0.0712 max mem: 9377 +Train: [40] [1400/6250] eta: 0:12:23 lr: 0.000087 grad: 0.0794 (0.0824) loss: 0.8185 (0.8271) time: 0.1174 data: 0.0365 max mem: 9377 +Train: [40] [1500/6250] eta: 0:12:01 lr: 0.000087 grad: 0.0788 (0.0823) loss: 0.8220 (0.8269) time: 0.1365 data: 0.0502 max mem: 9377 +Train: [40] [1600/6250] eta: 0:11:42 lr: 0.000087 grad: 0.0733 (0.0822) loss: 0.8250 (0.8266) time: 0.1315 data: 0.0505 max mem: 9377 +Train: [40] [1700/6250] eta: 0:11:26 lr: 0.000087 grad: 0.0786 (0.0822) loss: 0.8245 (0.8264) time: 0.1505 data: 0.0661 max mem: 9377 +Train: [40] [1800/6250] eta: 0:11:09 lr: 0.000087 grad: 0.0816 (0.0821) loss: 0.8280 (0.8262) time: 0.1628 data: 0.0819 max mem: 9377 +Train: [40] [1900/6250] eta: 0:10:53 lr: 0.000087 grad: 0.0768 (0.0821) loss: 0.8237 (0.8260) time: 0.1338 data: 0.0495 max mem: 9377 +Train: [40] [2000/6250] eta: 0:10:35 lr: 0.000087 grad: 0.0829 (0.0822) loss: 0.8239 (0.8256) time: 0.1296 data: 0.0492 max mem: 9377 +Train: [40] [2100/6250] eta: 0:10:17 lr: 0.000087 grad: 0.0783 (0.0822) loss: 0.8217 (0.8253) time: 0.1165 data: 0.0322 max mem: 9377 +Train: [40] [2200/6250] eta: 0:09:59 lr: 0.000087 grad: 0.0813 (0.0822) loss: 0.8201 (0.8250) time: 0.1335 data: 0.0503 max mem: 9377 +Train: [40] [2300/6250] eta: 0:09:43 lr: 0.000087 grad: 0.0820 (0.0823) loss: 0.8225 (0.8248) time: 0.1456 data: 0.0649 max mem: 9377 +Train: [40] [2400/6250] eta: 0:09:27 lr: 0.000087 grad: 0.0785 (0.0823) loss: 0.8226 (0.8246) time: 0.1084 data: 0.0215 max mem: 9377 +Train: [40] [2500/6250] eta: 0:09:11 lr: 0.000087 grad: 0.0794 (0.0824) loss: 0.8235 (0.8245) time: 0.1238 data: 0.0400 max mem: 9377 +Train: [40] [2600/6250] eta: 0:08:55 lr: 0.000087 grad: 0.0799 (0.0825) loss: 0.8161 (0.8242) time: 0.1284 data: 0.0447 max mem: 9377 +Train: [40] [2700/6250] eta: 0:08:40 lr: 0.000087 grad: 0.0827 (0.0826) loss: 0.8223 (0.8240) time: 0.1369 data: 0.0547 max mem: 9377 +Train: [40] [2800/6250] eta: 0:08:24 lr: 0.000087 grad: 0.0828 (0.0826) loss: 0.8203 (0.8238) time: 0.1580 data: 0.0796 max mem: 9377 +Train: [40] [2900/6250] eta: 0:08:08 lr: 0.000087 grad: 0.0847 (0.0827) loss: 0.8201 (0.8237) time: 0.1364 data: 0.0506 max mem: 9377 +Train: [40] [3000/6250] eta: 0:07:54 lr: 0.000087 grad: 0.0784 (0.0827) loss: 0.8194 (0.8235) time: 0.1636 data: 0.0810 max mem: 9377 +Train: [40] [3100/6250] eta: 0:07:42 lr: 0.000087 grad: 0.0873 (0.0828) loss: 0.8220 (0.8234) time: 0.1790 data: 0.0843 max mem: 9377 +Train: [40] [3200/6250] eta: 0:07:27 lr: 0.000087 grad: 0.0863 (0.0829) loss: 0.8191 (0.8233) time: 0.1417 data: 0.0513 max mem: 9377 +Train: [40] [3300/6250] eta: 0:07:13 lr: 0.000087 grad: 0.0817 (0.0830) loss: 0.8186 (0.8232) time: 0.1571 data: 0.0768 max mem: 9377 +Train: [40] [3400/6250] eta: 0:06:58 lr: 0.000087 grad: 0.0802 (0.0830) loss: 0.8205 (0.8231) time: 0.1567 data: 0.0746 max mem: 9377 +Train: [40] [3500/6250] eta: 0:06:43 lr: 0.000087 grad: 0.0825 (0.0830) loss: 0.8245 (0.8230) time: 0.1320 data: 0.0471 max mem: 9377 +Train: [40] [3600/6250] eta: 0:06:28 lr: 0.000087 grad: 0.0788 (0.0830) loss: 0.8195 (0.8230) time: 0.1355 data: 0.0456 max mem: 9377 +Train: [40] [3700/6250] eta: 0:06:13 lr: 0.000086 grad: 0.0862 (0.0830) loss: 0.8245 (0.8230) time: 0.1234 data: 0.0398 max mem: 9377 +Train: [40] [3800/6250] eta: 0:05:58 lr: 0.000086 grad: 0.0846 (0.0830) loss: 0.8230 (0.8229) time: 0.1415 data: 0.0549 max mem: 9377 +Train: [40] [3900/6250] eta: 0:05:43 lr: 0.000086 grad: 0.0861 (0.0831) loss: 0.8167 (0.8228) time: 0.1317 data: 0.0450 max mem: 9377 +Train: [40] [4000/6250] eta: 0:05:28 lr: 0.000086 grad: 0.0815 (0.0831) loss: 0.8252 (0.8228) time: 0.1503 data: 0.0671 max mem: 9377 +Train: [40] [4100/6250] eta: 0:05:12 lr: 0.000086 grad: 0.0844 (0.0831) loss: 0.8249 (0.8228) time: 0.1181 data: 0.0359 max mem: 9377 +Train: [40] [4200/6250] eta: 0:04:57 lr: 0.000086 grad: 0.0820 (0.0832) loss: 0.8181 (0.8227) time: 0.1306 data: 0.0463 max mem: 9377 +Train: [40] [4300/6250] eta: 0:04:42 lr: 0.000086 grad: 0.0839 (0.0832) loss: 0.8238 (0.8226) time: 0.1438 data: 0.0554 max mem: 9377 +Train: [40] [4400/6250] eta: 0:04:28 lr: 0.000086 grad: 0.0799 (0.0832) loss: 0.8220 (0.8226) time: 0.1561 data: 0.0765 max mem: 9377 +Train: [40] [4500/6250] eta: 0:04:13 lr: 0.000086 grad: 0.0789 (0.0832) loss: 0.8242 (0.8226) time: 0.1542 data: 0.0741 max mem: 9377 +Train: [40] [4600/6250] eta: 0:03:59 lr: 0.000086 grad: 0.0869 (0.0832) loss: 0.8162 (0.8225) time: 0.1481 data: 0.0634 max mem: 9377 +Train: [40] [4700/6250] eta: 0:03:45 lr: 0.000086 grad: 0.0843 (0.0833) loss: 0.8163 (0.8225) time: 0.1367 data: 0.0547 max mem: 9377 +Train: [40] [4800/6250] eta: 0:03:31 lr: 0.000086 grad: 0.0803 (0.0834) loss: 0.8209 (0.8224) time: 0.1333 data: 0.0553 max mem: 9377 +Train: [40] [4900/6250] eta: 0:03:16 lr: 0.000086 grad: 0.0825 (0.0834) loss: 0.8254 (0.8224) time: 0.1522 data: 0.0751 max mem: 9377 +Train: [40] [5000/6250] eta: 0:03:02 lr: 0.000086 grad: 0.0808 (0.0834) loss: 0.8207 (0.8223) time: 0.1602 data: 0.0806 max mem: 9377 +Train: [40] [5100/6250] eta: 0:02:47 lr: 0.000086 grad: 0.0840 (0.0835) loss: 0.8194 (0.8222) time: 0.1474 data: 0.0645 max mem: 9377 +Train: [40] [5200/6250] eta: 0:02:33 lr: 0.000086 grad: 0.0836 (0.0835) loss: 0.8185 (0.8222) time: 0.1608 data: 0.0742 max mem: 9377 +Train: [40] [5300/6250] eta: 0:02:18 lr: 0.000086 grad: 0.0889 (0.0836) loss: 0.8166 (0.8222) time: 0.1398 data: 0.0559 max mem: 9377 +Train: [40] [5400/6250] eta: 0:02:04 lr: 0.000086 grad: 0.0818 (0.0837) loss: 0.8209 (0.8221) time: 0.1487 data: 0.0650 max mem: 9377 +Train: [40] [5500/6250] eta: 0:01:49 lr: 0.000086 grad: 0.0816 (0.0837) loss: 0.8218 (0.8221) time: 0.1457 data: 0.0681 max mem: 9377 +Train: [40] [5600/6250] eta: 0:01:34 lr: 0.000086 grad: 0.0801 (0.0837) loss: 0.8194 (0.8221) time: 0.0906 data: 0.0046 max mem: 9377 +Train: [40] [5700/6250] eta: 0:01:20 lr: 0.000086 grad: 0.0831 (0.0837) loss: 0.8222 (0.8220) time: 0.1795 data: 0.0987 max mem: 9377 +Train: [40] [5800/6250] eta: 0:01:05 lr: 0.000086 grad: 0.0774 (0.0837) loss: 0.8293 (0.8221) time: 0.1407 data: 0.0613 max mem: 9377 +Train: [40] [5900/6250] eta: 0:00:51 lr: 0.000086 grad: 0.0901 (0.0837) loss: 0.8153 (0.8221) time: 0.1355 data: 0.0537 max mem: 9377 +Train: [40] [6000/6250] eta: 0:00:36 lr: 0.000086 grad: 0.0825 (0.0838) loss: 0.8249 (0.8220) time: 0.1422 data: 0.0621 max mem: 9377 +Train: [40] [6100/6250] eta: 0:00:21 lr: 0.000086 grad: 0.0796 (0.0838) loss: 0.8198 (0.8220) time: 0.1916 data: 0.1084 max mem: 9377 +Train: [40] [6200/6250] eta: 0:00:07 lr: 0.000086 grad: 0.0849 (0.0839) loss: 0.8201 (0.8220) time: 0.1354 data: 0.0529 max mem: 9377 +Train: [40] [6249/6250] eta: 0:00:00 lr: 0.000086 grad: 0.0823 (0.0839) loss: 0.8204 (0.8220) time: 0.1471 data: 0.0680 max mem: 9377 +Train: [40] Total time: 0:15:18 (0.1469 s / it) +Averaged stats: lr: 0.000086 grad: 0.0823 (0.0839) loss: 0.8204 (0.8220) +Eval (hcp-train-subset): [40] [ 0/62] eta: 0:05:22 loss: 0.8346 (0.8346) time: 5.1961 data: 5.1636 max mem: 9377 +Eval (hcp-train-subset): [40] [61/62] eta: 0:00:00 loss: 0.8338 (0.8348) time: 0.1381 data: 0.1126 max mem: 9377 +Eval (hcp-train-subset): [40] Total time: 0:00:15 (0.2448 s / it) +Averaged stats (hcp-train-subset): loss: 0.8338 (0.8348) +Eval (hcp-val): [40] [ 0/62] eta: 0:03:55 loss: 0.8355 (0.8355) time: 3.7962 data: 3.7112 max mem: 9377 +Eval (hcp-val): [40] [61/62] eta: 0:00:00 loss: 0.8356 (0.8381) time: 0.1309 data: 0.1058 max mem: 9377 +Eval (hcp-val): [40] Total time: 0:00:14 (0.2391 s / it) +Averaged stats (hcp-val): loss: 0.8356 (0.8381) +Eval (nsd-val): [40] [ 0/62] eta: 0:04:25 loss: 0.8002 (0.8002) time: 4.2780 data: 4.2137 max mem: 9377 +Eval (nsd-val): [40] [61/62] eta: 0:00:00 loss: 0.8101 (0.8129) time: 0.1231 data: 0.0960 max mem: 9377 +Eval (nsd-val): [40] Total time: 0:00:13 (0.2159 s / it) +Averaged stats (nsd-val): loss: 0.8101 (0.8129) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [41] [ 0/6250] eta: 8:14:17 lr: 0.000086 grad: 0.0872 (0.0872) loss: 0.8662 (0.8662) time: 4.7452 data: 4.4216 max mem: 9377 +Train: [41] [ 100/6250] eta: 0:20:07 lr: 0.000086 grad: 0.0879 (0.0923) loss: 0.8317 (0.8329) time: 0.1548 data: 0.0646 max mem: 9377 +Train: [41] [ 200/6250] eta: 0:17:21 lr: 0.000086 grad: 0.0821 (0.0900) loss: 0.8283 (0.8278) time: 0.1638 data: 0.0781 max mem: 9377 +Train: [41] [ 300/6250] eta: 0:16:09 lr: 0.000086 grad: 0.0823 (0.0891) loss: 0.8199 (0.8256) time: 0.1298 data: 0.0431 max mem: 9377 +Train: [41] [ 400/6250] eta: 0:15:20 lr: 0.000086 grad: 0.0813 (0.0892) loss: 0.8150 (0.8246) time: 0.1276 data: 0.0410 max mem: 9377 +Train: [41] [ 500/6250] eta: 0:14:51 lr: 0.000086 grad: 0.0829 (0.0880) loss: 0.8174 (0.8241) time: 0.1579 data: 0.0752 max mem: 9377 +Train: [41] [ 600/6250] eta: 0:14:17 lr: 0.000086 grad: 0.0823 (0.0872) loss: 0.8178 (0.8237) time: 0.1231 data: 0.0259 max mem: 9377 +Train: [41] [ 700/6250] eta: 0:13:57 lr: 0.000085 grad: 0.0810 (0.0870) loss: 0.8311 (0.8234) time: 0.1704 data: 0.0866 max mem: 9377 +Train: [41] [ 800/6250] eta: 0:13:34 lr: 0.000085 grad: 0.0762 (0.0864) loss: 0.8293 (0.8236) time: 0.1459 data: 0.0627 max mem: 9377 +Train: [41] [ 900/6250] eta: 0:13:17 lr: 0.000085 grad: 0.0733 (0.0855) loss: 0.8264 (0.8243) time: 0.1515 data: 0.0688 max mem: 9377 +Train: [41] [1000/6250] eta: 0:13:02 lr: 0.000085 grad: 0.0785 (0.0847) loss: 0.8277 (0.8247) time: 0.1333 data: 0.0487 max mem: 9377 +Train: [41] [1100/6250] eta: 0:12:45 lr: 0.000085 grad: 0.0735 (0.0843) loss: 0.8279 (0.8250) time: 0.1513 data: 0.0681 max mem: 9377 +Train: [41] [1200/6250] eta: 0:12:30 lr: 0.000085 grad: 0.0769 (0.0841) loss: 0.8233 (0.8252) time: 0.1446 data: 0.0620 max mem: 9377 +Train: [41] [1300/6250] eta: 0:12:16 lr: 0.000085 grad: 0.0765 (0.0839) loss: 0.8246 (0.8252) time: 0.1445 data: 0.0638 max mem: 9377 +Train: [41] [1400/6250] eta: 0:12:01 lr: 0.000085 grad: 0.0805 (0.0838) loss: 0.8161 (0.8250) time: 0.1505 data: 0.0749 max mem: 9377 +Train: [41] [1500/6250] eta: 0:11:43 lr: 0.000085 grad: 0.0819 (0.0837) loss: 0.8176 (0.8247) time: 0.1308 data: 0.0533 max mem: 9377 +Train: [41] [1600/6250] eta: 0:11:29 lr: 0.000085 grad: 0.0783 (0.0835) loss: 0.8252 (0.8245) time: 0.1402 data: 0.0626 max mem: 9377 +Train: [41] [1700/6250] eta: 0:11:12 lr: 0.000085 grad: 0.0820 (0.0837) loss: 0.8255 (0.8244) time: 0.1469 data: 0.0624 max mem: 9377 +Train: [41] [1800/6250] eta: 0:10:56 lr: 0.000085 grad: 0.0854 (0.0839) loss: 0.8193 (0.8241) time: 0.1379 data: 0.0583 max mem: 9377 +Train: [41] [1900/6250] eta: 0:10:40 lr: 0.000085 grad: 0.0867 (0.0840) loss: 0.8188 (0.8239) time: 0.1347 data: 0.0480 max mem: 9377 +Train: [41] [2000/6250] eta: 0:10:24 lr: 0.000085 grad: 0.0811 (0.0840) loss: 0.8249 (0.8237) time: 0.1378 data: 0.0542 max mem: 9377 +Train: [41] [2100/6250] eta: 0:10:08 lr: 0.000085 grad: 0.0846 (0.0840) loss: 0.8257 (0.8236) time: 0.1272 data: 0.0465 max mem: 9377 +Train: [41] [2200/6250] eta: 0:09:52 lr: 0.000085 grad: 0.0801 (0.0840) loss: 0.8237 (0.8237) time: 0.1308 data: 0.0496 max mem: 9377 +Train: [41] [2300/6250] eta: 0:09:37 lr: 0.000085 grad: 0.0754 (0.0839) loss: 0.8289 (0.8237) time: 0.1515 data: 0.0657 max mem: 9377 +Train: [41] [2400/6250] eta: 0:09:22 lr: 0.000085 grad: 0.0793 (0.0839) loss: 0.8300 (0.8238) time: 0.1335 data: 0.0510 max mem: 9377 +Train: [41] [2500/6250] eta: 0:09:08 lr: 0.000085 grad: 0.0840 (0.0839) loss: 0.8223 (0.8238) time: 0.1639 data: 0.0840 max mem: 9377 +Train: [41] [2600/6250] eta: 0:08:54 lr: 0.000085 grad: 0.0772 (0.0838) loss: 0.8268 (0.8239) time: 0.1718 data: 0.0961 max mem: 9377 +Train: [41] [2700/6250] eta: 0:08:40 lr: 0.000085 grad: 0.0771 (0.0837) loss: 0.8202 (0.8239) time: 0.1524 data: 0.0592 max mem: 9377 +Train: [41] [2800/6250] eta: 0:08:25 lr: 0.000085 grad: 0.0775 (0.0836) loss: 0.8257 (0.8239) time: 0.1486 data: 0.0675 max mem: 9377 +Train: [41] [2900/6250] eta: 0:08:12 lr: 0.000085 grad: 0.0843 (0.0836) loss: 0.8190 (0.8240) time: 0.1910 data: 0.1157 max mem: 9377 +Train: [41] [3000/6250] eta: 0:08:00 lr: 0.000085 grad: 0.0810 (0.0834) loss: 0.8225 (0.8240) time: 0.1734 data: 0.0858 max mem: 9377 +Train: [41] [3100/6250] eta: 0:07:46 lr: 0.000085 grad: 0.0752 (0.0833) loss: 0.8287 (0.8241) time: 0.1583 data: 0.0758 max mem: 9377 +Train: [41] [3200/6250] eta: 0:07:32 lr: 0.000085 grad: 0.0767 (0.0832) loss: 0.8253 (0.8242) time: 0.1736 data: 0.0961 max mem: 9377 +Train: [41] [3300/6250] eta: 0:07:18 lr: 0.000085 grad: 0.0818 (0.0831) loss: 0.8245 (0.8243) time: 0.1445 data: 0.0570 max mem: 9377 +Train: [41] [3400/6250] eta: 0:07:03 lr: 0.000085 grad: 0.0709 (0.0830) loss: 0.8325 (0.8244) time: 0.1516 data: 0.0681 max mem: 9377 +Train: [41] [3500/6250] eta: 0:06:49 lr: 0.000085 grad: 0.0805 (0.0829) loss: 0.8238 (0.8245) time: 0.1379 data: 0.0526 max mem: 9377 +Train: [41] [3600/6250] eta: 0:06:34 lr: 0.000085 grad: 0.0744 (0.0828) loss: 0.8317 (0.8246) time: 0.1315 data: 0.0432 max mem: 9377 +Train: [41] [3700/6250] eta: 0:06:19 lr: 0.000085 grad: 0.0807 (0.0827) loss: 0.8258 (0.8247) time: 0.1342 data: 0.0319 max mem: 9377 +Train: [41] [3800/6250] eta: 0:06:03 lr: 0.000085 grad: 0.0781 (0.0828) loss: 0.8282 (0.8248) time: 0.1444 data: 0.0568 max mem: 9377 +Train: [41] [3900/6250] eta: 0:05:48 lr: 0.000084 grad: 0.0772 (0.0827) loss: 0.8287 (0.8249) time: 0.1344 data: 0.0492 max mem: 9377 +Train: [41] [4000/6250] eta: 0:05:33 lr: 0.000084 grad: 0.0783 (0.0827) loss: 0.8304 (0.8249) time: 0.1677 data: 0.0895 max mem: 9377 +Train: [41] [4100/6250] eta: 0:05:19 lr: 0.000084 grad: 0.0803 (0.0827) loss: 0.8266 (0.8250) time: 0.1930 data: 0.1139 max mem: 9377 +Train: [41] [4200/6250] eta: 0:05:06 lr: 0.000084 grad: 0.0781 (0.0827) loss: 0.8272 (0.8249) time: 0.1979 data: 0.1240 max mem: 9377 +Train: [41] [4300/6250] eta: 0:04:52 lr: 0.000084 grad: 0.0822 (0.0828) loss: 0.8271 (0.8249) time: 0.1691 data: 0.0910 max mem: 9377 +Train: [41] [4400/6250] eta: 0:04:38 lr: 0.000084 grad: 0.0897 (0.0828) loss: 0.8173 (0.8249) time: 0.1785 data: 0.0995 max mem: 9377 +Train: [41] [4500/6250] eta: 0:04:24 lr: 0.000084 grad: 0.0829 (0.0829) loss: 0.8187 (0.8248) time: 0.1415 data: 0.0573 max mem: 9377 +Train: [41] [4600/6250] eta: 0:04:09 lr: 0.000084 grad: 0.0847 (0.0830) loss: 0.8256 (0.8247) time: 0.1608 data: 0.0830 max mem: 9377 +Train: [41] [4700/6250] eta: 0:03:54 lr: 0.000084 grad: 0.0783 (0.0831) loss: 0.8172 (0.8246) time: 0.1444 data: 0.0652 max mem: 9377 +Train: [41] [4800/6250] eta: 0:03:39 lr: 0.000084 grad: 0.0837 (0.0831) loss: 0.8190 (0.8246) time: 0.1610 data: 0.0782 max mem: 9377 +Train: [41] [4900/6250] eta: 0:03:24 lr: 0.000084 grad: 0.0830 (0.0832) loss: 0.8175 (0.8245) time: 0.1598 data: 0.0820 max mem: 9377 +Train: [41] [5000/6250] eta: 0:03:08 lr: 0.000084 grad: 0.0824 (0.0832) loss: 0.8184 (0.8244) time: 0.1442 data: 0.0581 max mem: 9377 +Train: [41] [5100/6250] eta: 0:02:53 lr: 0.000084 grad: 0.0802 (0.0832) loss: 0.8205 (0.8243) time: 0.0993 data: 0.0095 max mem: 9377 +Train: [41] [5200/6250] eta: 0:02:38 lr: 0.000084 grad: 0.0813 (0.0833) loss: 0.8251 (0.8243) time: 0.1430 data: 0.0571 max mem: 9377 +Train: [41] [5300/6250] eta: 0:02:22 lr: 0.000084 grad: 0.0836 (0.0833) loss: 0.8189 (0.8242) time: 0.1290 data: 0.0455 max mem: 9377 +Train: [41] [5400/6250] eta: 0:02:07 lr: 0.000084 grad: 0.0777 (0.0834) loss: 0.8203 (0.8241) time: 0.1469 data: 0.0689 max mem: 9377 +Train: [41] [5500/6250] eta: 0:01:52 lr: 0.000084 grad: 0.0845 (0.0834) loss: 0.8183 (0.8240) time: 0.1495 data: 0.0667 max mem: 9377 +Train: [41] [5600/6250] eta: 0:01:37 lr: 0.000084 grad: 0.0808 (0.0834) loss: 0.8297 (0.8240) time: 0.1406 data: 0.0596 max mem: 9377 +Train: [41] [5700/6250] eta: 0:01:22 lr: 0.000084 grad: 0.0863 (0.0834) loss: 0.8252 (0.8240) time: 0.1294 data: 0.0477 max mem: 9377 +Train: [41] [5800/6250] eta: 0:01:07 lr: 0.000084 grad: 0.0777 (0.0835) loss: 0.8269 (0.8240) time: 0.1466 data: 0.0723 max mem: 9377 +Train: [41] [5900/6250] eta: 0:00:52 lr: 0.000084 grad: 0.0815 (0.0835) loss: 0.8224 (0.8239) time: 0.1332 data: 0.0484 max mem: 9377 +Train: [41] [6000/6250] eta: 0:00:37 lr: 0.000084 grad: 0.0817 (0.0835) loss: 0.8228 (0.8239) time: 0.1277 data: 0.0501 max mem: 9377 +Train: [41] [6100/6250] eta: 0:00:22 lr: 0.000084 grad: 0.0781 (0.0835) loss: 0.8272 (0.8239) time: 0.1543 data: 0.0716 max mem: 9377 +Train: [41] [6200/6250] eta: 0:00:07 lr: 0.000084 grad: 0.0804 (0.0836) loss: 0.8280 (0.8239) time: 0.1288 data: 0.0478 max mem: 9377 +Train: [41] [6249/6250] eta: 0:00:00 lr: 0.000084 grad: 0.0816 (0.0836) loss: 0.8163 (0.8238) time: 0.1703 data: 0.0958 max mem: 9377 +Train: [41] Total time: 0:15:37 (0.1500 s / it) +Averaged stats: lr: 0.000084 grad: 0.0816 (0.0836) loss: 0.8163 (0.8238) +Eval (hcp-train-subset): [41] [ 0/62] eta: 0:04:59 loss: 0.8343 (0.8343) time: 4.8274 data: 4.7927 max mem: 9377 +Eval (hcp-train-subset): [41] [61/62] eta: 0:00:00 loss: 0.8339 (0.8346) time: 0.1515 data: 0.1241 max mem: 9377 +Eval (hcp-train-subset): [41] Total time: 0:00:14 (0.2408 s / it) +Averaged stats (hcp-train-subset): loss: 0.8339 (0.8346) +Eval (hcp-val): [41] [ 0/62] eta: 0:04:42 loss: 0.8318 (0.8318) time: 4.5618 data: 4.5316 max mem: 9377 +Eval (hcp-val): [41] [61/62] eta: 0:00:00 loss: 0.8357 (0.8373) time: 0.1347 data: 0.1094 max mem: 9377 +Eval (hcp-val): [41] Total time: 0:00:13 (0.2142 s / it) +Averaged stats (hcp-val): loss: 0.8357 (0.8373) +Eval (nsd-val): [41] [ 0/62] eta: 0:04:58 loss: 0.8009 (0.8009) time: 4.8139 data: 4.7838 max mem: 9377 +Eval (nsd-val): [41] [61/62] eta: 0:00:00 loss: 0.8098 (0.8119) time: 0.1179 data: 0.0925 max mem: 9377 +Eval (nsd-val): [41] Total time: 0:00:13 (0.2169 s / it) +Averaged stats (nsd-val): loss: 0.8098 (0.8119) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [42] [ 0/6250] eta: 10:33:25 lr: 0.000084 grad: nan (nan) loss: 0.8345 (0.8345) time: 6.0808 data: 5.9673 max mem: 9377 +Train: [42] [ 100/6250] eta: 0:20:43 lr: 0.000084 grad: 0.0719 (0.0883) loss: 0.8377 (0.8385) time: 0.1547 data: 0.0721 max mem: 9377 +Train: [42] [ 200/6250] eta: 0:17:32 lr: 0.000084 grad: 0.0824 (0.0858) loss: 0.8263 (0.8316) time: 0.1437 data: 0.0525 max mem: 9377 +Train: [42] [ 300/6250] eta: 0:16:40 lr: 0.000084 grad: 0.0790 (0.0857) loss: 0.8266 (0.8268) time: 0.1528 data: 0.0657 max mem: 9377 +Train: [42] [ 400/6250] eta: 0:16:02 lr: 0.000084 grad: 0.0746 (0.0848) loss: 0.8230 (0.8259) time: 0.1761 data: 0.0913 max mem: 9377 +Train: [42] [ 500/6250] eta: 0:15:14 lr: 0.000084 grad: 0.0757 (0.0833) loss: 0.8238 (0.8261) time: 0.1488 data: 0.0558 max mem: 9377 +Train: [42] [ 600/6250] eta: 0:14:45 lr: 0.000084 grad: 0.0774 (0.0826) loss: 0.8291 (0.8262) time: 0.1551 data: 0.0714 max mem: 9377 +Train: [42] [ 700/6250] eta: 0:14:21 lr: 0.000084 grad: 0.0759 (0.0821) loss: 0.8188 (0.8263) time: 0.1613 data: 0.0817 max mem: 9377 +Train: [42] [ 800/6250] eta: 0:14:20 lr: 0.000084 grad: 0.0760 (0.0819) loss: 0.8253 (0.8261) time: 0.1782 data: 0.0908 max mem: 9377 +Train: [42] [ 900/6250] eta: 0:14:13 lr: 0.000083 grad: 0.0777 (0.0821) loss: 0.8168 (0.8257) time: 0.1680 data: 0.0800 max mem: 9377 +Train: [42] [1000/6250] eta: 0:14:00 lr: 0.000083 grad: 0.0801 (0.0820) loss: 0.8223 (0.8250) time: 0.1625 data: 0.0820 max mem: 9377 +Train: [42] [1100/6250] eta: 0:13:41 lr: 0.000083 grad: 0.0801 (0.0820) loss: 0.8168 (0.8245) time: 0.1461 data: 0.0565 max mem: 9377 +Train: [42] [1200/6250] eta: 0:13:21 lr: 0.000083 grad: 0.0759 (0.0819) loss: 0.8247 (0.8242) time: 0.1781 data: 0.0988 max mem: 9377 +Train: [42] [1300/6250] eta: 0:12:57 lr: 0.000083 grad: 0.0813 (0.0824) loss: 0.8227 (0.8238) time: 0.1554 data: 0.0743 max mem: 9377 +Train: [42] [1400/6250] eta: 0:12:36 lr: 0.000083 grad: 0.0762 (0.0824) loss: 0.8243 (0.8235) time: 0.1476 data: 0.0692 max mem: 9377 +Train: [42] [1500/6250] eta: 0:12:19 lr: 0.000083 grad: 0.0812 (0.0823) loss: 0.8090 (0.8233) time: 0.1468 data: 0.0603 max mem: 9377 +Train: [42] [1600/6250] eta: 0:12:02 lr: 0.000083 grad: 0.0773 (0.0823) loss: 0.8216 (0.8231) time: 0.1476 data: 0.0646 max mem: 9377 +Train: [42] [1700/6250] eta: 0:11:48 lr: 0.000083 grad: 0.0796 (0.0822) loss: 0.8237 (0.8228) time: 0.1774 data: 0.0961 max mem: 9377 +Train: [42] [1800/6250] eta: 0:11:31 lr: 0.000083 grad: 0.0793 (0.0823) loss: 0.8216 (0.8227) time: 0.1334 data: 0.0518 max mem: 9377 +Train: [42] [1900/6250] eta: 0:11:15 lr: 0.000083 grad: 0.0796 (0.0823) loss: 0.8288 (0.8228) time: 0.1417 data: 0.0571 max mem: 9377 +Train: [42] [2000/6250] eta: 0:10:57 lr: 0.000083 grad: 0.0800 (0.0822) loss: 0.8276 (0.8228) time: 0.1322 data: 0.0426 max mem: 9377 +Train: [42] [2100/6250] eta: 0:10:42 lr: 0.000083 grad: 0.0786 (0.0821) loss: 0.8265 (0.8228) time: 0.1798 data: 0.1036 max mem: 9377 +Train: [42] [2200/6250] eta: 0:10:24 lr: 0.000083 grad: 0.0803 (0.0821) loss: 0.8190 (0.8228) time: 0.1651 data: 0.0867 max mem: 9377 +Train: [42] [2300/6250] eta: 0:10:06 lr: 0.000083 grad: 0.0762 (0.0822) loss: 0.8182 (0.8227) time: 0.1340 data: 0.0501 max mem: 9377 +Train: [42] [2400/6250] eta: 0:09:49 lr: 0.000083 grad: 0.0834 (0.0822) loss: 0.8242 (0.8227) time: 0.1480 data: 0.0654 max mem: 9377 +Train: [42] [2500/6250] eta: 0:09:37 lr: 0.000083 grad: 0.0807 (0.0823) loss: 0.8288 (0.8227) time: 0.1706 data: 0.0855 max mem: 9377 +Train: [42] [2600/6250] eta: 0:09:22 lr: 0.000083 grad: 0.0801 (0.0823) loss: 0.8179 (0.8226) time: 0.1443 data: 0.0666 max mem: 9377 +Train: [42] [2700/6250] eta: 0:09:07 lr: 0.000083 grad: 0.0819 (0.0825) loss: 0.8172 (0.8224) time: 0.1568 data: 0.0772 max mem: 9377 +Train: [42] [2800/6250] eta: 0:08:51 lr: 0.000083 grad: 0.0820 (0.0827) loss: 0.8234 (0.8223) time: 0.1431 data: 0.0574 max mem: 9377 +Train: [42] [2900/6250] eta: 0:08:36 lr: 0.000083 grad: 0.0846 (0.0828) loss: 0.8232 (0.8222) time: 0.1411 data: 0.0587 max mem: 9377 +Train: [42] [3000/6250] eta: 0:08:20 lr: 0.000083 grad: 0.0853 (0.0831) loss: 0.8170 (0.8220) time: 0.1605 data: 0.0778 max mem: 9377 +Train: [42] [3100/6250] eta: 0:08:04 lr: 0.000083 grad: 0.0776 (0.0831) loss: 0.8243 (0.8219) time: 0.1296 data: 0.0470 max mem: 9377 +Train: [42] [3200/6250] eta: 0:07:47 lr: 0.000083 grad: 0.0818 (0.0832) loss: 0.8178 (0.8219) time: 0.1460 data: 0.0630 max mem: 9377 +Train: [42] [3300/6250] eta: 0:07:31 lr: 0.000083 grad: 0.0838 (0.0833) loss: 0.8216 (0.8219) time: 0.1350 data: 0.0479 max mem: 9377 +Train: [42] [3400/6250] eta: 0:07:15 lr: 0.000083 grad: 0.0849 (0.0834) loss: 0.8164 (0.8218) time: 0.1353 data: 0.0506 max mem: 9377 +Train: [42] [3500/6250] eta: 0:06:58 lr: 0.000083 grad: 0.0864 (0.0834) loss: 0.8261 (0.8218) time: 0.1392 data: 0.0494 max mem: 9377 +Train: [42] [3600/6250] eta: 0:06:43 lr: 0.000083 grad: 0.0801 (0.0836) loss: 0.8214 (0.8218) time: 0.1613 data: 0.0825 max mem: 9377 +Train: [42] [3700/6250] eta: 0:06:27 lr: 0.000083 grad: 0.0805 (0.0837) loss: 0.8166 (0.8217) time: 0.1533 data: 0.0740 max mem: 9377 +Train: [42] [3800/6250] eta: 0:06:12 lr: 0.000083 grad: 0.0865 (0.0838) loss: 0.8218 (0.8217) time: 0.1581 data: 0.0790 max mem: 9377 +Train: [42] [3900/6250] eta: 0:05:56 lr: 0.000083 grad: 0.0796 (0.0839) loss: 0.8233 (0.8216) time: 0.1200 data: 0.0388 max mem: 9377 +Train: [42] [4000/6250] eta: 0:05:41 lr: 0.000083 grad: 0.0866 (0.0840) loss: 0.8211 (0.8215) time: 0.1743 data: 0.0961 max mem: 9377 +Train: [42] [4100/6250] eta: 0:05:27 lr: 0.000082 grad: 0.0881 (0.0841) loss: 0.8178 (0.8215) time: 0.1505 data: 0.0656 max mem: 9377 +Train: [42] [4200/6250] eta: 0:05:12 lr: 0.000082 grad: 0.0855 (0.0842) loss: 0.8192 (0.8214) time: 0.1690 data: 0.0901 max mem: 9377 +Train: [42] [4300/6250] eta: 0:04:56 lr: 0.000082 grad: 0.0766 (0.0842) loss: 0.8197 (0.8213) time: 0.1434 data: 0.0575 max mem: 9377 +Train: [42] [4400/6250] eta: 0:04:41 lr: 0.000082 grad: 0.0853 (0.0842) loss: 0.8153 (0.8213) time: 0.1352 data: 0.0528 max mem: 9377 +Train: [42] [4500/6250] eta: 0:04:25 lr: 0.000082 grad: 0.0782 (0.0843) loss: 0.8241 (0.8213) time: 0.1489 data: 0.0689 max mem: 9377 +Train: [42] [4600/6250] eta: 0:04:10 lr: 0.000082 grad: 0.0859 (0.0843) loss: 0.8143 (0.8212) time: 0.1382 data: 0.0494 max mem: 9377 +Train: [42] [4700/6250] eta: 0:03:54 lr: 0.000082 grad: 0.0841 (0.0843) loss: 0.8196 (0.8211) time: 0.1295 data: 0.0486 max mem: 9377 +Train: [42] [4800/6250] eta: 0:03:39 lr: 0.000082 grad: 0.0851 (0.0844) loss: 0.8233 (0.8211) time: 0.1686 data: 0.0895 max mem: 9377 +Train: [42] [4900/6250] eta: 0:03:23 lr: 0.000082 grad: 0.0837 (0.0845) loss: 0.8213 (0.8211) time: 0.1319 data: 0.0505 max mem: 9377 +Train: [42] [5000/6250] eta: 0:03:08 lr: 0.000082 grad: 0.0846 (0.0845) loss: 0.8220 (0.8210) time: 0.1342 data: 0.0518 max mem: 9377 +Train: [42] [5100/6250] eta: 0:02:53 lr: 0.000082 grad: 0.0815 (0.0846) loss: 0.8207 (0.8210) time: 0.1453 data: 0.0606 max mem: 9377 +Train: [42] [5200/6250] eta: 0:02:38 lr: 0.000082 grad: 0.0865 (0.0846) loss: 0.8186 (0.8209) time: 0.1593 data: 0.0816 max mem: 9377 +Train: [42] [5300/6250] eta: 0:02:23 lr: 0.000082 grad: 0.0850 (0.0847) loss: 0.8195 (0.8209) time: 0.1661 data: 0.0899 max mem: 9377 +Train: [42] [5400/6250] eta: 0:02:08 lr: 0.000082 grad: 0.0811 (0.0848) loss: 0.8219 (0.8209) time: 0.1633 data: 0.0748 max mem: 9377 +Train: [42] [5500/6250] eta: 0:01:53 lr: 0.000082 grad: 0.0881 (0.0848) loss: 0.8237 (0.8209) time: 0.1684 data: 0.0918 max mem: 9377 +Train: [42] [5600/6250] eta: 0:01:38 lr: 0.000082 grad: 0.0879 (0.0848) loss: 0.8148 (0.8209) time: 0.1814 data: 0.1066 max mem: 9377 +Train: [42] [5700/6250] eta: 0:01:23 lr: 0.000082 grad: 0.0846 (0.0849) loss: 0.8175 (0.8208) time: 0.1356 data: 0.0541 max mem: 9377 +Train: [42] [5800/6250] eta: 0:01:08 lr: 0.000082 grad: 0.0855 (0.0849) loss: 0.8195 (0.8208) time: 0.1583 data: 0.0745 max mem: 9377 +Train: [42] [5900/6250] eta: 0:00:53 lr: 0.000082 grad: 0.0860 (0.0851) loss: 0.8130 (0.8208) time: 0.1511 data: 0.0726 max mem: 9377 +Train: [42] [6000/6250] eta: 0:00:37 lr: 0.000082 grad: 0.0856 (0.0851) loss: 0.8172 (0.8207) time: 0.1438 data: 0.0647 max mem: 9377 +Train: [42] [6100/6250] eta: 0:00:22 lr: 0.000082 grad: 0.0859 (0.0852) loss: 0.8192 (0.8207) time: 0.1344 data: 0.0520 max mem: 9377 +Train: [42] [6200/6250] eta: 0:00:07 lr: 0.000082 grad: 0.0833 (0.0852) loss: 0.8184 (0.8207) time: 0.1731 data: 0.0979 max mem: 9377 +Train: [42] [6249/6250] eta: 0:00:00 lr: 0.000082 grad: 0.0844 (0.0852) loss: 0.8219 (0.8207) time: 0.1443 data: 0.0608 max mem: 9377 +Train: [42] Total time: 0:15:51 (0.1522 s / it) +Averaged stats: lr: 0.000082 grad: 0.0844 (0.0852) loss: 0.8219 (0.8207) +Eval (hcp-train-subset): [42] [ 0/62] eta: 0:05:01 loss: 0.8353 (0.8353) time: 4.8625 data: 4.8302 max mem: 9377 +Eval (hcp-train-subset): [42] [61/62] eta: 0:00:00 loss: 0.8336 (0.8339) time: 0.1245 data: 0.0992 max mem: 9377 +Eval (hcp-train-subset): [42] Total time: 0:00:14 (0.2372 s / it) +Averaged stats (hcp-train-subset): loss: 0.8336 (0.8339) +Eval (hcp-val): [42] [ 0/62] eta: 0:03:31 loss: 0.8333 (0.8333) time: 3.4158 data: 3.3197 max mem: 9377 +Eval (hcp-val): [42] [61/62] eta: 0:00:00 loss: 0.8358 (0.8370) time: 0.1283 data: 0.1032 max mem: 9377 +Eval (hcp-val): [42] Total time: 0:00:13 (0.2120 s / it) +Averaged stats (hcp-val): loss: 0.8358 (0.8370) +Eval (nsd-val): [42] [ 0/62] eta: 0:03:30 loss: 0.7998 (0.7998) time: 3.4020 data: 3.3311 max mem: 9377 +Eval (nsd-val): [42] [61/62] eta: 0:00:00 loss: 0.8116 (0.8113) time: 0.1324 data: 0.1068 max mem: 9377 +Eval (nsd-val): [42] Total time: 0:00:13 (0.2097 s / it) +Averaged stats (nsd-val): loss: 0.8116 (0.8113) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [43] [ 0/6250] eta: 8:40:46 lr: 0.000082 grad: 0.1041 (0.1041) loss: 0.8697 (0.8697) time: 4.9995 data: 4.8727 max mem: 9377 +Train: [43] [ 100/6250] eta: 0:19:07 lr: 0.000082 grad: 0.0716 (0.0875) loss: 0.8351 (0.8389) time: 0.1313 data: 0.0356 max mem: 9377 +Train: [43] [ 200/6250] eta: 0:16:55 lr: 0.000082 grad: 0.0726 (0.0813) loss: 0.8400 (0.8381) time: 0.1493 data: 0.0739 max mem: 9377 +Train: [43] [ 300/6250] eta: 0:15:48 lr: 0.000082 grad: 0.0805 (0.0824) loss: 0.8309 (0.8357) time: 0.1413 data: 0.0549 max mem: 9377 +Train: [43] [ 400/6250] eta: 0:14:56 lr: 0.000082 grad: 0.0785 (0.0820) loss: 0.8238 (0.8342) time: 0.1286 data: 0.0437 max mem: 9377 +Train: [43] [ 500/6250] eta: 0:14:23 lr: 0.000082 grad: 0.0806 (0.0822) loss: 0.8287 (0.8328) time: 0.1451 data: 0.0545 max mem: 9377 +Train: [43] [ 600/6250] eta: 0:13:50 lr: 0.000082 grad: 0.0768 (0.0818) loss: 0.8218 (0.8323) time: 0.1140 data: 0.0230 max mem: 9377 +Train: [43] [ 700/6250] eta: 0:13:26 lr: 0.000082 grad: 0.0764 (0.0810) loss: 0.8327 (0.8321) time: 0.1011 data: 0.0077 max mem: 9377 +Train: [43] [ 800/6250] eta: 0:13:08 lr: 0.000082 grad: 0.0810 (0.0808) loss: 0.8249 (0.8318) time: 0.1429 data: 0.0544 max mem: 9377 +Train: [43] [ 900/6250] eta: 0:12:52 lr: 0.000082 grad: 0.0788 (0.0807) loss: 0.8231 (0.8313) time: 0.1367 data: 0.0527 max mem: 9377 +Train: [43] [1000/6250] eta: 0:12:37 lr: 0.000081 grad: 0.0784 (0.0805) loss: 0.8273 (0.8307) time: 0.1523 data: 0.0733 max mem: 9377 +Train: [43] [1100/6250] eta: 0:12:22 lr: 0.000081 grad: 0.0776 (0.0807) loss: 0.8261 (0.8302) time: 0.1724 data: 0.0909 max mem: 9377 +Train: [43] [1200/6250] eta: 0:12:03 lr: 0.000081 grad: 0.0774 (0.0807) loss: 0.8270 (0.8298) time: 0.1562 data: 0.0742 max mem: 9377 +Train: [43] [1300/6250] eta: 0:11:47 lr: 0.000081 grad: 0.0790 (0.0807) loss: 0.8193 (0.8293) time: 0.1127 data: 0.0252 max mem: 9377 +Train: [43] [1400/6250] eta: 0:11:32 lr: 0.000081 grad: 0.0746 (0.0807) loss: 0.8231 (0.8290) time: 0.1384 data: 0.0630 max mem: 9377 +Train: [43] [1500/6250] eta: 0:11:20 lr: 0.000081 grad: 0.0802 (0.0808) loss: 0.8253 (0.8286) time: 0.1318 data: 0.0513 max mem: 9377 +Train: [43] [1600/6250] eta: 0:11:08 lr: 0.000081 grad: 0.0818 (0.0808) loss: 0.8210 (0.8283) time: 0.1411 data: 0.0602 max mem: 9377 +Train: [43] [1700/6250] eta: 0:10:54 lr: 0.000081 grad: 0.0813 (0.0808) loss: 0.8242 (0.8280) time: 0.1357 data: 0.0471 max mem: 9377 +Train: [43] [1800/6250] eta: 0:10:40 lr: 0.000081 grad: 0.0841 (0.0812) loss: 0.8184 (0.8278) time: 0.1347 data: 0.0539 max mem: 9377 +Train: [43] [1900/6250] eta: 0:10:27 lr: 0.000081 grad: 0.0794 (0.0812) loss: 0.8246 (0.8276) time: 0.1749 data: 0.0970 max mem: 9377 +Train: [43] [2000/6250] eta: 0:10:11 lr: 0.000081 grad: 0.0815 (0.0814) loss: 0.8245 (0.8274) time: 0.1351 data: 0.0537 max mem: 9377 +Train: [43] [2100/6250] eta: 0:09:57 lr: 0.000081 grad: 0.0853 (0.0816) loss: 0.8223 (0.8271) time: 0.1814 data: 0.0926 max mem: 9377 +Train: [43] [2200/6250] eta: 0:09:51 lr: 0.000081 grad: 0.0775 (0.0816) loss: 0.8225 (0.8268) time: 0.1781 data: 0.0993 max mem: 9377 +Train: [43] [2300/6250] eta: 0:09:41 lr: 0.000081 grad: 0.0845 (0.0817) loss: 0.8266 (0.8267) time: 0.1661 data: 0.0927 max mem: 9377 +Train: [43] [2400/6250] eta: 0:09:29 lr: 0.000081 grad: 0.0811 (0.0818) loss: 0.8192 (0.8266) time: 0.1756 data: 0.0966 max mem: 9377 +Train: [43] [2500/6250] eta: 0:09:14 lr: 0.000081 grad: 0.0833 (0.0820) loss: 0.8191 (0.8264) time: 0.1708 data: 0.0862 max mem: 9377 +Train: [43] [2600/6250] eta: 0:09:03 lr: 0.000081 grad: 0.0869 (0.0823) loss: 0.8221 (0.8262) time: 0.1728 data: 0.0903 max mem: 9377 +Train: [43] [2700/6250] eta: 0:08:50 lr: 0.000081 grad: 0.0833 (0.0825) loss: 0.8225 (0.8260) time: 0.1768 data: 0.0916 max mem: 9377 +Train: [43] [2800/6250] eta: 0:08:36 lr: 0.000081 grad: 0.0788 (0.0826) loss: 0.8234 (0.8259) time: 0.1467 data: 0.0642 max mem: 9377 +Train: [43] [2900/6250] eta: 0:08:22 lr: 0.000081 grad: 0.0813 (0.0827) loss: 0.8272 (0.8258) time: 0.1530 data: 0.0684 max mem: 9377 +Train: [43] [3000/6250] eta: 0:08:07 lr: 0.000081 grad: 0.0814 (0.0829) loss: 0.8262 (0.8256) time: 0.1367 data: 0.0549 max mem: 9377 +Train: [43] [3100/6250] eta: 0:07:51 lr: 0.000081 grad: 0.0800 (0.0830) loss: 0.8290 (0.8255) time: 0.1300 data: 0.0416 max mem: 9377 +Train: [43] [3200/6250] eta: 0:07:35 lr: 0.000081 grad: 0.0829 (0.0830) loss: 0.8244 (0.8254) time: 0.1382 data: 0.0597 max mem: 9377 +Train: [43] [3300/6250] eta: 0:07:20 lr: 0.000081 grad: 0.0828 (0.0831) loss: 0.8256 (0.8254) time: 0.1607 data: 0.0817 max mem: 9377 +Train: [43] [3400/6250] eta: 0:07:05 lr: 0.000081 grad: 0.0802 (0.0832) loss: 0.8185 (0.8253) time: 0.1393 data: 0.0554 max mem: 9377 +Train: [43] [3500/6250] eta: 0:06:50 lr: 0.000081 grad: 0.0784 (0.0833) loss: 0.8216 (0.8252) time: 0.1202 data: 0.0386 max mem: 9377 +Train: [43] [3600/6250] eta: 0:06:35 lr: 0.000081 grad: 0.0830 (0.0834) loss: 0.8256 (0.8252) time: 0.1323 data: 0.0494 max mem: 9377 +Train: [43] [3700/6250] eta: 0:06:19 lr: 0.000081 grad: 0.0882 (0.0835) loss: 0.8221 (0.8251) time: 0.1367 data: 0.0605 max mem: 9377 +Train: [43] [3800/6250] eta: 0:06:06 lr: 0.000081 grad: 0.0865 (0.0836) loss: 0.8237 (0.8251) time: 0.2140 data: 0.1277 max mem: 9377 +Train: [43] [3900/6250] eta: 0:05:51 lr: 0.000081 grad: 0.0869 (0.0838) loss: 0.8240 (0.8250) time: 0.1429 data: 0.0547 max mem: 9377 +Train: [43] [4000/6250] eta: 0:05:36 lr: 0.000081 grad: 0.0876 (0.0840) loss: 0.8200 (0.8248) time: 0.1745 data: 0.0914 max mem: 9377 +Train: [43] [4100/6250] eta: 0:05:22 lr: 0.000081 grad: 0.0855 (0.0841) loss: 0.8207 (0.8246) time: 0.1852 data: 0.0994 max mem: 9377 +Train: [43] [4200/6250] eta: 0:05:07 lr: 0.000080 grad: 0.0871 (0.0842) loss: 0.8159 (0.8245) time: 0.2024 data: 0.1210 max mem: 9377 +Train: [43] [4300/6250] eta: 0:04:52 lr: 0.000080 grad: 0.0891 (0.0844) loss: 0.8205 (0.8244) time: 0.1694 data: 0.0879 max mem: 9377 +Train: [43] [4400/6250] eta: 0:04:36 lr: 0.000080 grad: 0.0877 (0.0845) loss: 0.8270 (0.8243) time: 0.1319 data: 0.0432 max mem: 9377 +Train: [43] [4500/6250] eta: 0:04:21 lr: 0.000080 grad: 0.0878 (0.0846) loss: 0.8174 (0.8242) time: 0.1384 data: 0.0555 max mem: 9377 +Train: [43] [4600/6250] eta: 0:04:06 lr: 0.000080 grad: 0.0846 (0.0848) loss: 0.8201 (0.8240) time: 0.1567 data: 0.0772 max mem: 9377 +Train: [43] [4700/6250] eta: 0:03:51 lr: 0.000080 grad: 0.0881 (0.0850) loss: 0.8165 (0.8239) time: 0.1302 data: 0.0434 max mem: 9377 +Train: [43] [4800/6250] eta: 0:03:36 lr: 0.000080 grad: 0.0888 (0.0851) loss: 0.8192 (0.8238) time: 0.1536 data: 0.0651 max mem: 9377 +Train: [43] [4900/6250] eta: 0:03:21 lr: 0.000080 grad: 0.0935 (0.0852) loss: 0.8182 (0.8236) time: 0.1431 data: 0.0608 max mem: 9377 +Train: [43] [5000/6250] eta: 0:03:05 lr: 0.000080 grad: 0.0853 (0.0852) loss: 0.8191 (0.8235) time: 0.1311 data: 0.0480 max mem: 9377 +Train: [43] [5100/6250] eta: 0:02:51 lr: 0.000080 grad: 0.0870 (0.0853) loss: 0.8201 (0.8234) time: 0.1724 data: 0.0880 max mem: 9377 +Train: [43] [5200/6250] eta: 0:02:36 lr: 0.000080 grad: 0.0848 (0.0853) loss: 0.8195 (0.8234) time: 0.1425 data: 0.0675 max mem: 9377 +Train: [43] [5300/6250] eta: 0:02:21 lr: 0.000080 grad: 0.0881 (0.0854) loss: 0.8148 (0.8233) time: 0.1517 data: 0.0711 max mem: 9377 +Train: [43] [5400/6250] eta: 0:02:06 lr: 0.000080 grad: 0.0853 (0.0855) loss: 0.8122 (0.8232) time: 0.1600 data: 0.0820 max mem: 9377 +Train: [43] [5500/6250] eta: 0:01:51 lr: 0.000080 grad: 0.0916 (0.0855) loss: 0.8139 (0.8231) time: 0.1528 data: 0.0761 max mem: 9377 +Train: [43] [5600/6250] eta: 0:01:36 lr: 0.000080 grad: 0.0876 (0.0856) loss: 0.8151 (0.8230) time: 0.1469 data: 0.0662 max mem: 9377 +Train: [43] [5700/6250] eta: 0:01:22 lr: 0.000080 grad: 0.0817 (0.0858) loss: 0.8167 (0.8229) time: 0.1595 data: 0.0830 max mem: 9377 +Train: [43] [5800/6250] eta: 0:01:07 lr: 0.000080 grad: 0.0931 (0.0859) loss: 0.8158 (0.8227) time: 0.1446 data: 0.0596 max mem: 9377 +Train: [43] [5900/6250] eta: 0:00:52 lr: 0.000080 grad: 0.0892 (0.0859) loss: 0.8182 (0.8227) time: 0.1367 data: 0.0554 max mem: 9377 +Train: [43] [6000/6250] eta: 0:00:37 lr: 0.000080 grad: 0.0869 (0.0860) loss: 0.8218 (0.8226) time: 0.1505 data: 0.0726 max mem: 9377 +Train: [43] [6100/6250] eta: 0:00:22 lr: 0.000080 grad: 0.0793 (0.0860) loss: 0.8216 (0.8225) time: 0.1775 data: 0.0962 max mem: 9377 +Train: [43] [6200/6250] eta: 0:00:07 lr: 0.000080 grad: 0.0844 (0.0861) loss: 0.8174 (0.8225) time: 0.1469 data: 0.0613 max mem: 9377 +Train: [43] [6249/6250] eta: 0:00:00 lr: 0.000080 grad: 0.0873 (0.0861) loss: 0.8156 (0.8225) time: 0.1770 data: 0.0969 max mem: 9377 +Train: [43] Total time: 0:15:38 (0.1502 s / it) +Averaged stats: lr: 0.000080 grad: 0.0873 (0.0861) loss: 0.8156 (0.8225) +Eval (hcp-train-subset): [43] [ 0/62] eta: 0:04:32 loss: 0.8319 (0.8319) time: 4.3928 data: 4.2784 max mem: 9377 +Eval (hcp-train-subset): [43] [61/62] eta: 0:00:00 loss: 0.8345 (0.8341) time: 0.1179 data: 0.0905 max mem: 9377 +Eval (hcp-train-subset): [43] Total time: 0:00:14 (0.2314 s / it) +Averaged stats (hcp-train-subset): loss: 0.8345 (0.8341) +Eval (hcp-val): [43] [ 0/62] eta: 0:05:42 loss: 0.8329 (0.8329) time: 5.5245 data: 5.4949 max mem: 9377 +Eval (hcp-val): [43] [61/62] eta: 0:00:00 loss: 0.8355 (0.8375) time: 0.1528 data: 0.1271 max mem: 9377 +Eval (hcp-val): [43] Total time: 0:00:13 (0.2242 s / it) +Averaged stats (hcp-val): loss: 0.8355 (0.8375) +Eval (nsd-val): [43] [ 0/62] eta: 0:05:32 loss: 0.7977 (0.7977) time: 5.3682 data: 5.3371 max mem: 9377 +Eval (nsd-val): [43] [61/62] eta: 0:00:00 loss: 0.8094 (0.8090) time: 0.1442 data: 0.1157 max mem: 9377 +Eval (nsd-val): [43] Total time: 0:00:14 (0.2314 s / it) +Averaged stats (nsd-val): loss: 0.8094 (0.8090) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [44] [ 0/6250] eta: 10:17:24 lr: 0.000080 grad: 0.0972 (0.0972) loss: 0.8774 (0.8774) time: 5.9272 data: 5.8015 max mem: 9377 +Train: [44] [ 100/6250] eta: 0:22:09 lr: 0.000080 grad: 0.0778 (0.0976) loss: 0.8406 (0.8428) time: 0.1782 data: 0.0739 max mem: 9377 +Train: [44] [ 200/6250] eta: 0:19:42 lr: 0.000080 grad: 0.0814 (0.0919) loss: 0.8361 (0.8380) time: 0.2117 data: 0.0545 max mem: 9377 +Train: [44] [ 300/6250] eta: 0:18:27 lr: 0.000080 grad: 0.0801 (0.0889) loss: 0.8279 (0.8348) time: 0.1524 data: 0.0647 max mem: 9377 +Train: [44] [ 400/6250] eta: 0:17:16 lr: 0.000080 grad: 0.0835 (0.0876) loss: 0.8243 (0.8327) time: 0.1284 data: 0.0305 max mem: 9377 +Train: [44] [ 500/6250] eta: 0:16:20 lr: 0.000080 grad: 0.0791 (0.0873) loss: 0.8200 (0.8304) time: 0.1454 data: 0.0607 max mem: 9377 +Train: [44] [ 600/6250] eta: 0:15:57 lr: 0.000080 grad: 0.0829 (0.0870) loss: 0.8190 (0.8287) time: 0.2010 data: 0.1164 max mem: 9377 +Train: [44] [ 700/6250] eta: 0:15:32 lr: 0.000080 grad: 0.0835 (0.0867) loss: 0.8199 (0.8275) time: 0.1532 data: 0.0645 max mem: 9377 +Train: [44] [ 800/6250] eta: 0:15:21 lr: 0.000080 grad: 0.0802 (0.0867) loss: 0.8193 (0.8265) time: 0.1853 data: 0.0984 max mem: 9377 +Train: [44] [ 900/6250] eta: 0:15:16 lr: 0.000080 grad: 0.0809 (0.0864) loss: 0.8210 (0.8258) time: 0.2022 data: 0.1223 max mem: 9377 +Train: [44] [1000/6250] eta: 0:14:49 lr: 0.000080 grad: 0.0798 (0.0863) loss: 0.8252 (0.8253) time: 0.1292 data: 0.0390 max mem: 9377 +Train: [44] [1100/6250] eta: 0:14:20 lr: 0.000079 grad: 0.0837 (0.0864) loss: 0.8164 (0.8247) time: 0.1288 data: 0.0360 max mem: 9377 +Train: [44] [1200/6250] eta: 0:13:53 lr: 0.000079 grad: 0.0818 (0.0865) loss: 0.8182 (0.8242) time: 0.1130 data: 0.0226 max mem: 9377 +Train: [44] [1300/6250] eta: 0:13:30 lr: 0.000079 grad: 0.0865 (0.0864) loss: 0.8172 (0.8239) time: 0.1597 data: 0.0728 max mem: 9377 +Train: [44] [1400/6250] eta: 0:13:11 lr: 0.000079 grad: 0.0854 (0.0864) loss: 0.8136 (0.8235) time: 0.1807 data: 0.0960 max mem: 9377 +Train: [44] [1500/6250] eta: 0:12:49 lr: 0.000079 grad: 0.0804 (0.0863) loss: 0.8267 (0.8235) time: 0.1500 data: 0.0574 max mem: 9377 +Train: [44] [1600/6250] eta: 0:12:30 lr: 0.000079 grad: 0.0925 (0.0864) loss: 0.8167 (0.8231) time: 0.1657 data: 0.0864 max mem: 9377 +Train: [44] [1700/6250] eta: 0:12:09 lr: 0.000079 grad: 0.0854 (0.0865) loss: 0.8189 (0.8229) time: 0.1442 data: 0.0567 max mem: 9377 +Train: [44] [1800/6250] eta: 0:11:48 lr: 0.000079 grad: 0.0855 (0.0866) loss: 0.8189 (0.8228) time: 0.1181 data: 0.0300 max mem: 9377 +Train: [44] [1900/6250] eta: 0:11:29 lr: 0.000079 grad: 0.0931 (0.0868) loss: 0.8216 (0.8227) time: 0.1411 data: 0.0591 max mem: 9377 +Train: [44] [2000/6250] eta: 0:11:18 lr: 0.000079 grad: 0.0770 (0.0866) loss: 0.8285 (0.8227) time: 0.1705 data: 0.0869 max mem: 9377 +Train: [44] [2100/6250] eta: 0:11:03 lr: 0.000079 grad: 0.0819 (0.0866) loss: 0.8225 (0.8227) time: 0.1816 data: 0.1022 max mem: 9377 +Train: [44] [2200/6250] eta: 0:10:47 lr: 0.000079 grad: 0.0862 (0.0868) loss: 0.8189 (0.8226) time: 0.1554 data: 0.0756 max mem: 9377 +Train: [44] [2300/6250] eta: 0:10:30 lr: 0.000079 grad: 0.0938 (0.0870) loss: 0.8226 (0.8226) time: 0.1614 data: 0.0755 max mem: 9377 +Train: [44] [2400/6250] eta: 0:10:11 lr: 0.000079 grad: 0.0812 (0.0870) loss: 0.8217 (0.8225) time: 0.1470 data: 0.0642 max mem: 9377 +Train: [44] [2500/6250] eta: 0:09:52 lr: 0.000079 grad: 0.0842 (0.0869) loss: 0.8263 (0.8225) time: 0.1296 data: 0.0427 max mem: 9377 +Train: [44] [2600/6250] eta: 0:09:35 lr: 0.000079 grad: 0.0812 (0.0869) loss: 0.8280 (0.8224) time: 0.1471 data: 0.0627 max mem: 9377 +Train: [44] [2700/6250] eta: 0:09:17 lr: 0.000079 grad: 0.0903 (0.0869) loss: 0.8194 (0.8224) time: 0.1455 data: 0.0576 max mem: 9377 +Train: [44] [2800/6250] eta: 0:08:59 lr: 0.000079 grad: 0.0832 (0.0868) loss: 0.8250 (0.8225) time: 0.1255 data: 0.0330 max mem: 9377 +Train: [44] [2900/6250] eta: 0:08:42 lr: 0.000079 grad: 0.0855 (0.0869) loss: 0.8200 (0.8224) time: 0.1604 data: 0.0740 max mem: 9377 +Train: [44] [3000/6250] eta: 0:08:25 lr: 0.000079 grad: 0.0877 (0.0869) loss: 0.8198 (0.8224) time: 0.1328 data: 0.0510 max mem: 9377 +Train: [44] [3100/6250] eta: 0:08:07 lr: 0.000079 grad: 0.0794 (0.0868) loss: 0.8263 (0.8224) time: 0.1340 data: 0.0557 max mem: 9377 +Train: [44] [3200/6250] eta: 0:07:51 lr: 0.000079 grad: 0.0810 (0.0868) loss: 0.8175 (0.8225) time: 0.1322 data: 0.0496 max mem: 9377 +Train: [44] [3300/6250] eta: 0:07:34 lr: 0.000079 grad: 0.0832 (0.0868) loss: 0.8241 (0.8225) time: 0.1306 data: 0.0564 max mem: 9377 +Train: [44] [3400/6250] eta: 0:07:18 lr: 0.000079 grad: 0.0852 (0.0868) loss: 0.8255 (0.8226) time: 0.1320 data: 0.0450 max mem: 9377 +Train: [44] [3500/6250] eta: 0:07:01 lr: 0.000079 grad: 0.0788 (0.0867) loss: 0.8253 (0.8226) time: 0.1433 data: 0.0605 max mem: 9377 +Train: [44] [3600/6250] eta: 0:06:46 lr: 0.000079 grad: 0.0779 (0.0866) loss: 0.8301 (0.8227) time: 0.1832 data: 0.1071 max mem: 9377 +Train: [44] [3700/6250] eta: 0:06:32 lr: 0.000079 grad: 0.0848 (0.0865) loss: 0.8239 (0.8227) time: 0.1509 data: 0.0702 max mem: 9377 +Train: [44] [3800/6250] eta: 0:06:16 lr: 0.000079 grad: 0.0820 (0.0865) loss: 0.8247 (0.8228) time: 0.1568 data: 0.0770 max mem: 9377 +Train: [44] [3900/6250] eta: 0:06:01 lr: 0.000079 grad: 0.0883 (0.0865) loss: 0.8221 (0.8228) time: 0.1339 data: 0.0588 max mem: 9377 +Train: [44] [4000/6250] eta: 0:05:46 lr: 0.000079 grad: 0.0852 (0.0866) loss: 0.8183 (0.8228) time: 0.1365 data: 0.0493 max mem: 9377 +Train: [44] [4100/6250] eta: 0:05:30 lr: 0.000079 grad: 0.0868 (0.0867) loss: 0.8152 (0.8227) time: 0.1525 data: 0.0699 max mem: 9377 +Train: [44] [4200/6250] eta: 0:05:15 lr: 0.000078 grad: 0.0885 (0.0867) loss: 0.8191 (0.8226) time: 0.1345 data: 0.0537 max mem: 9377 +Train: [44] [4300/6250] eta: 0:04:59 lr: 0.000078 grad: 0.0833 (0.0868) loss: 0.8176 (0.8225) time: 0.1450 data: 0.0589 max mem: 9377 +Train: [44] [4400/6250] eta: 0:04:43 lr: 0.000078 grad: 0.0828 (0.0869) loss: 0.8192 (0.8225) time: 0.1213 data: 0.0318 max mem: 9377 +Train: [44] [4500/6250] eta: 0:04:27 lr: 0.000078 grad: 0.0826 (0.0870) loss: 0.8226 (0.8224) time: 0.1224 data: 0.0360 max mem: 9377 +Train: [44] [4600/6250] eta: 0:04:12 lr: 0.000078 grad: 0.0842 (0.0871) loss: 0.8226 (0.8224) time: 0.1414 data: 0.0398 max mem: 9377 +Train: [44] [4700/6250] eta: 0:03:56 lr: 0.000078 grad: 0.0823 (0.0870) loss: 0.8278 (0.8224) time: 0.1448 data: 0.0591 max mem: 9377 +Train: [44] [4800/6250] eta: 0:03:40 lr: 0.000078 grad: 0.0815 (0.0870) loss: 0.8188 (0.8224) time: 0.1369 data: 0.0447 max mem: 9377 +Train: [44] [4900/6250] eta: 0:03:25 lr: 0.000078 grad: 0.0837 (0.0870) loss: 0.8254 (0.8224) time: 0.1405 data: 0.0565 max mem: 9377 +Train: [44] [5000/6250] eta: 0:03:09 lr: 0.000078 grad: 0.0858 (0.0869) loss: 0.8208 (0.8224) time: 0.1451 data: 0.0591 max mem: 9377 +Train: [44] [5100/6250] eta: 0:02:54 lr: 0.000078 grad: 0.0884 (0.0869) loss: 0.8253 (0.8225) time: 0.1371 data: 0.0525 max mem: 9377 +Train: [44] [5200/6250] eta: 0:02:39 lr: 0.000078 grad: 0.0838 (0.0868) loss: 0.8187 (0.8224) time: 0.1507 data: 0.0643 max mem: 9377 +Train: [44] [5300/6250] eta: 0:02:23 lr: 0.000078 grad: 0.0836 (0.0868) loss: 0.8239 (0.8224) time: 0.1486 data: 0.0669 max mem: 9377 +Train: [44] [5400/6250] eta: 0:02:08 lr: 0.000078 grad: 0.0819 (0.0868) loss: 0.8231 (0.8224) time: 0.1394 data: 0.0599 max mem: 9377 +Train: [44] [5500/6250] eta: 0:01:53 lr: 0.000078 grad: 0.0786 (0.0869) loss: 0.8202 (0.8223) time: 0.1321 data: 0.0500 max mem: 9377 +Train: [44] [5600/6250] eta: 0:01:37 lr: 0.000078 grad: 0.0837 (0.0869) loss: 0.8114 (0.8222) time: 0.1391 data: 0.0550 max mem: 9377 +Train: [44] [5700/6250] eta: 0:01:22 lr: 0.000078 grad: 0.0905 (0.0870) loss: 0.8163 (0.8221) time: 0.1429 data: 0.0678 max mem: 9377 +Train: [44] [5800/6250] eta: 0:01:07 lr: 0.000078 grad: 0.0916 (0.0870) loss: 0.8210 (0.8220) time: 0.1624 data: 0.0883 max mem: 9377 +Train: [44] [5900/6250] eta: 0:00:52 lr: 0.000078 grad: 0.0822 (0.0871) loss: 0.8227 (0.8220) time: 0.1705 data: 0.0872 max mem: 9377 +Train: [44] [6000/6250] eta: 0:00:37 lr: 0.000078 grad: 0.0893 (0.0871) loss: 0.8171 (0.8219) time: 0.1535 data: 0.0714 max mem: 9377 +Train: [44] [6100/6250] eta: 0:00:22 lr: 0.000078 grad: 0.0901 (0.0872) loss: 0.8189 (0.8219) time: 0.1657 data: 0.0862 max mem: 9377 +Train: [44] [6200/6250] eta: 0:00:07 lr: 0.000078 grad: 0.0843 (0.0872) loss: 0.8129 (0.8218) time: 0.1478 data: 0.0632 max mem: 9377 +Train: [44] [6249/6250] eta: 0:00:00 lr: 0.000078 grad: 0.0840 (0.0872) loss: 0.8112 (0.8217) time: 0.1414 data: 0.0574 max mem: 9377 +Train: [44] Total time: 0:15:47 (0.1516 s / it) +Averaged stats: lr: 0.000078 grad: 0.0840 (0.0872) loss: 0.8112 (0.8217) +Eval (hcp-train-subset): [44] [ 0/62] eta: 0:05:25 loss: 0.8345 (0.8345) time: 5.2509 data: 5.2172 max mem: 9377 +Eval (hcp-train-subset): [44] [61/62] eta: 0:00:00 loss: 0.8350 (0.8330) time: 0.1219 data: 0.0944 max mem: 9377 +Eval (hcp-train-subset): [44] Total time: 0:00:14 (0.2319 s / it) +Averaged stats (hcp-train-subset): loss: 0.8350 (0.8330) +Making plots (hcp-train-subset): example=56 +Eval (hcp-val): [44] [ 0/62] eta: 0:03:59 loss: 0.8372 (0.8372) time: 3.8628 data: 3.7832 max mem: 9377 +Eval (hcp-val): [44] [61/62] eta: 0:00:00 loss: 0.8367 (0.8366) time: 0.1337 data: 0.1084 max mem: 9377 +Eval (hcp-val): [44] Total time: 0:00:13 (0.2139 s / it) +Averaged stats (hcp-val): loss: 0.8367 (0.8366) +Making plots (hcp-val): example=17 +Eval (nsd-val): [44] [ 0/62] eta: 0:03:33 loss: 0.8077 (0.8077) time: 3.4432 data: 3.3650 max mem: 9377 +Eval (nsd-val): [44] [61/62] eta: 0:00:00 loss: 0.8195 (0.8194) time: 0.1350 data: 0.1084 max mem: 9377 +Eval (nsd-val): [44] Total time: 0:00:13 (0.2164 s / it) +Averaged stats (nsd-val): loss: 0.8195 (0.8194) +Making plots (nsd-val): example=60 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00044.pth +Train: [45] [ 0/6250] eta: 7:54:16 lr: 0.000078 grad: 0.0615 (0.0615) loss: 0.8535 (0.8535) time: 4.5530 data: 4.3655 max mem: 9377 +Train: [45] [ 100/6250] eta: 0:20:10 lr: 0.000078 grad: 0.0806 (0.0947) loss: 0.8296 (0.8309) time: 0.1471 data: 0.0533 max mem: 9377 +Train: [45] [ 200/6250] eta: 0:17:46 lr: 0.000078 grad: 0.0805 (0.0905) loss: 0.8292 (0.8288) time: 0.1584 data: 0.0652 max mem: 9377 +Train: [45] [ 300/6250] eta: 0:16:17 lr: 0.000078 grad: 0.0762 (0.0867) loss: 0.8178 (0.8268) time: 0.1291 data: 0.0493 max mem: 9377 +Train: [45] [ 400/6250] eta: 0:15:29 lr: 0.000078 grad: 0.0742 (0.0846) loss: 0.8253 (0.8262) time: 0.1397 data: 0.0512 max mem: 9377 +Train: [45] [ 500/6250] eta: 0:14:52 lr: 0.000078 grad: 0.0739 (0.0834) loss: 0.8250 (0.8265) time: 0.1342 data: 0.0465 max mem: 9377 +Train: [45] [ 600/6250] eta: 0:14:26 lr: 0.000078 grad: 0.0817 (0.0830) loss: 0.8300 (0.8266) time: 0.1558 data: 0.0669 max mem: 9377 +Train: [45] [ 700/6250] eta: 0:14:01 lr: 0.000078 grad: 0.0792 (0.0830) loss: 0.8233 (0.8267) time: 0.1477 data: 0.0605 max mem: 9377 +Train: [45] [ 800/6250] eta: 0:13:40 lr: 0.000078 grad: 0.0810 (0.0829) loss: 0.8215 (0.8266) time: 0.1414 data: 0.0525 max mem: 9377 +Train: [45] [ 900/6250] eta: 0:13:26 lr: 0.000078 grad: 0.0795 (0.0827) loss: 0.8256 (0.8263) time: 0.1503 data: 0.0638 max mem: 9377 +Train: [45] [1000/6250] eta: 0:13:14 lr: 0.000078 grad: 0.0797 (0.0829) loss: 0.8206 (0.8259) time: 0.1519 data: 0.0675 max mem: 9377 +Train: [45] [1100/6250] eta: 0:13:01 lr: 0.000077 grad: 0.0788 (0.0830) loss: 0.8248 (0.8258) time: 0.1571 data: 0.0731 max mem: 9377 +Train: [45] [1200/6250] eta: 0:12:48 lr: 0.000077 grad: 0.0827 (0.0831) loss: 0.8210 (0.8254) time: 0.1251 data: 0.0406 max mem: 9377 +Train: [45] [1300/6250] eta: 0:12:36 lr: 0.000077 grad: 0.0768 (0.0835) loss: 0.8186 (0.8251) time: 0.1638 data: 0.0842 max mem: 9377 +Train: [45] [1400/6250] eta: 0:12:21 lr: 0.000077 grad: 0.0850 (0.0836) loss: 0.8168 (0.8248) time: 0.1572 data: 0.0813 max mem: 9377 +Train: [45] [1500/6250] eta: 0:12:06 lr: 0.000077 grad: 0.0789 (0.0837) loss: 0.8199 (0.8245) time: 0.1601 data: 0.0788 max mem: 9377 +Train: [45] [1600/6250] eta: 0:11:57 lr: 0.000077 grad: 0.0818 (0.0837) loss: 0.8267 (0.8243) time: 0.1695 data: 0.0794 max mem: 9377 +Train: [45] [1700/6250] eta: 0:11:40 lr: 0.000077 grad: 0.0833 (0.0840) loss: 0.8154 (0.8239) time: 0.1575 data: 0.0757 max mem: 9377 +Train: [45] [1800/6250] eta: 0:11:23 lr: 0.000077 grad: 0.0844 (0.0842) loss: 0.8155 (0.8236) time: 0.1433 data: 0.0639 max mem: 9377 +Train: [45] [1900/6250] eta: 0:11:07 lr: 0.000077 grad: 0.0823 (0.0844) loss: 0.8142 (0.8232) time: 0.1718 data: 0.0875 max mem: 9377 +Train: [45] [2000/6250] eta: 0:10:52 lr: 0.000077 grad: 0.0851 (0.0847) loss: 0.8212 (0.8228) time: 0.1494 data: 0.0588 max mem: 9377 +Train: [45] [2100/6250] eta: 0:10:37 lr: 0.000077 grad: 0.0825 (0.0849) loss: 0.8141 (0.8225) time: 0.1534 data: 0.0618 max mem: 9377 +Train: [45] [2200/6250] eta: 0:10:19 lr: 0.000077 grad: 0.0866 (0.0851) loss: 0.8108 (0.8221) time: 0.1386 data: 0.0559 max mem: 9377 +Train: [45] [2300/6250] eta: 0:10:01 lr: 0.000077 grad: 0.0894 (0.0859) loss: 0.8094 (0.8217) time: 0.1385 data: 0.0557 max mem: 9377 +Train: [45] [2400/6250] eta: 0:09:44 lr: 0.000077 grad: 0.0920 (0.0861) loss: 0.8058 (0.8214) time: 0.1323 data: 0.0471 max mem: 9377 +Train: [45] [2500/6250] eta: 0:09:27 lr: 0.000077 grad: 0.0872 (0.0863) loss: 0.8145 (0.8211) time: 0.1606 data: 0.0813 max mem: 9377 +Train: [45] [2600/6250] eta: 0:09:12 lr: 0.000077 grad: 0.0892 (0.0864) loss: 0.8184 (0.8208) time: 0.1554 data: 0.0757 max mem: 9377 +Train: [45] [2700/6250] eta: 0:08:58 lr: 0.000077 grad: 0.0873 (0.0865) loss: 0.8149 (0.8207) time: 0.1646 data: 0.0867 max mem: 9377 +Train: [45] [2800/6250] eta: 0:08:44 lr: 0.000077 grad: 0.0783 (0.0865) loss: 0.8225 (0.8206) time: 0.1471 data: 0.0600 max mem: 9377 +Train: [45] [2900/6250] eta: 0:08:29 lr: 0.000077 grad: 0.0805 (0.0865) loss: 0.8194 (0.8204) time: 0.1443 data: 0.0623 max mem: 9377 +Train: [45] [3000/6250] eta: 0:08:14 lr: 0.000077 grad: 0.0856 (0.0865) loss: 0.8159 (0.8203) time: 0.1652 data: 0.0883 max mem: 9377 +Train: [45] [3100/6250] eta: 0:07:58 lr: 0.000077 grad: 0.0872 (0.0867) loss: 0.8126 (0.8201) time: 0.1386 data: 0.0513 max mem: 9377 +Train: [45] [3200/6250] eta: 0:07:44 lr: 0.000077 grad: 0.0906 (0.0868) loss: 0.8181 (0.8200) time: 0.2451 data: 0.1708 max mem: 9377 +Train: [45] [3300/6250] eta: 0:07:32 lr: 0.000077 grad: 0.0837 (0.0870) loss: 0.8169 (0.8199) time: 0.1687 data: 0.0889 max mem: 9377 +Train: [45] [3400/6250] eta: 0:07:18 lr: 0.000077 grad: 0.0872 (0.0871) loss: 0.8172 (0.8198) time: 0.1747 data: 0.0820 max mem: 9377 +Train: [45] [3500/6250] eta: 0:07:04 lr: 0.000077 grad: 0.0867 (0.0872) loss: 0.8154 (0.8197) time: 0.1605 data: 0.0813 max mem: 9377 +Train: [45] [3600/6250] eta: 0:06:50 lr: 0.000077 grad: 0.0812 (0.0873) loss: 0.8184 (0.8197) time: 0.1522 data: 0.0547 max mem: 9377 +Train: [45] [3700/6250] eta: 0:06:35 lr: 0.000077 grad: 0.0849 (0.0873) loss: 0.8135 (0.8195) time: 0.1537 data: 0.0729 max mem: 9377 +Train: [45] [3800/6250] eta: 0:06:19 lr: 0.000077 grad: 0.0935 (0.0874) loss: 0.8132 (0.8194) time: 0.1618 data: 0.0767 max mem: 9377 +Train: [45] [3900/6250] eta: 0:06:04 lr: 0.000077 grad: 0.0940 (0.0878) loss: 0.8150 (0.8193) time: 0.1456 data: 0.0574 max mem: 9377 +Train: [45] [4000/6250] eta: 0:05:47 lr: 0.000077 grad: 0.0914 (0.0878) loss: 0.8189 (0.8192) time: 0.1438 data: 0.0572 max mem: 9377 +Train: [45] [4100/6250] eta: 0:05:32 lr: 0.000077 grad: 0.0830 (0.0878) loss: 0.8142 (0.8191) time: 0.1615 data: 0.0814 max mem: 9377 +Train: [45] [4200/6250] eta: 0:05:16 lr: 0.000076 grad: 0.0882 (0.0879) loss: 0.8151 (0.8190) time: 0.1826 data: 0.0959 max mem: 9377 +Train: [45] [4300/6250] eta: 0:05:00 lr: 0.000076 grad: 0.0924 (0.0879) loss: 0.8132 (0.8190) time: 0.1527 data: 0.0626 max mem: 9377 +Train: [45] [4400/6250] eta: 0:04:44 lr: 0.000076 grad: 0.0845 (0.0879) loss: 0.8200 (0.8189) time: 0.1339 data: 0.0501 max mem: 9377 +Train: [45] [4500/6250] eta: 0:04:28 lr: 0.000076 grad: 0.0900 (0.0880) loss: 0.8232 (0.8189) time: 0.1311 data: 0.0406 max mem: 9377 +Train: [45] [4600/6250] eta: 0:04:13 lr: 0.000076 grad: 0.0896 (0.0880) loss: 0.8193 (0.8189) time: 0.1594 data: 0.0774 max mem: 9377 +Train: [45] [4700/6250] eta: 0:03:57 lr: 0.000076 grad: 0.0829 (0.0879) loss: 0.8170 (0.8189) time: 0.1519 data: 0.0717 max mem: 9377 +Train: [45] [4800/6250] eta: 0:03:41 lr: 0.000076 grad: 0.0807 (0.0879) loss: 0.8203 (0.8189) time: 0.1511 data: 0.0724 max mem: 9377 +Train: [45] [4900/6250] eta: 0:03:26 lr: 0.000076 grad: 0.0822 (0.0878) loss: 0.8196 (0.8189) time: 0.1528 data: 0.0761 max mem: 9377 +Train: [45] [5000/6250] eta: 0:03:10 lr: 0.000076 grad: 0.0882 (0.0878) loss: 0.8239 (0.8190) time: 0.1466 data: 0.0656 max mem: 9377 +Train: [45] [5100/6250] eta: 0:02:55 lr: 0.000076 grad: 0.0897 (0.0879) loss: 0.8181 (0.8190) time: 0.1366 data: 0.0590 max mem: 9377 +Train: [45] [5200/6250] eta: 0:02:39 lr: 0.000076 grad: 0.0887 (0.0879) loss: 0.8199 (0.8190) time: 0.1566 data: 0.0838 max mem: 9377 +Train: [45] [5300/6250] eta: 0:02:24 lr: 0.000076 grad: 0.0867 (0.0878) loss: 0.8145 (0.8190) time: 0.1246 data: 0.0388 max mem: 9377 +Train: [45] [5400/6250] eta: 0:02:08 lr: 0.000076 grad: 0.0844 (0.0879) loss: 0.8194 (0.8191) time: 0.1283 data: 0.0469 max mem: 9377 +Train: [45] [5500/6250] eta: 0:01:53 lr: 0.000076 grad: 0.0888 (0.0879) loss: 0.8205 (0.8191) time: 0.1545 data: 0.0740 max mem: 9377 +Train: [45] [5600/6250] eta: 0:01:38 lr: 0.000076 grad: 0.0894 (0.0879) loss: 0.8206 (0.8192) time: 0.1394 data: 0.0598 max mem: 9377 +Train: [45] [5700/6250] eta: 0:01:23 lr: 0.000076 grad: 0.0838 (0.0879) loss: 0.8197 (0.8192) time: 0.1520 data: 0.0681 max mem: 9377 +Train: [45] [5800/6250] eta: 0:01:08 lr: 0.000076 grad: 0.0893 (0.0879) loss: 0.8172 (0.8192) time: 0.1177 data: 0.0390 max mem: 9377 +Train: [45] [5900/6250] eta: 0:00:52 lr: 0.000076 grad: 0.0887 (0.0879) loss: 0.8182 (0.8192) time: 0.1373 data: 0.0589 max mem: 9377 +Train: [45] [6000/6250] eta: 0:00:37 lr: 0.000076 grad: 0.0832 (0.0880) loss: 0.8112 (0.8191) time: 0.1422 data: 0.0621 max mem: 9377 +Train: [45] [6100/6250] eta: 0:00:22 lr: 0.000076 grad: 0.0844 (0.0880) loss: 0.8193 (0.8191) time: 0.1554 data: 0.0695 max mem: 9377 +Train: [45] [6200/6250] eta: 0:00:07 lr: 0.000076 grad: 0.0886 (0.0880) loss: 0.8224 (0.8191) time: 0.1459 data: 0.0653 max mem: 9377 +Train: [45] [6249/6250] eta: 0:00:00 lr: 0.000076 grad: 0.0836 (0.0880) loss: 0.8255 (0.8191) time: 0.1378 data: 0.0590 max mem: 9377 +Train: [45] Total time: 0:15:48 (0.1518 s / it) +Averaged stats: lr: 0.000076 grad: 0.0836 (0.0880) loss: 0.8255 (0.8191) +Eval (hcp-train-subset): [45] [ 0/62] eta: 0:06:32 loss: 0.8356 (0.8356) time: 6.3377 data: 6.3064 max mem: 9377 +Eval (hcp-train-subset): [45] [61/62] eta: 0:00:00 loss: 0.8342 (0.8346) time: 0.1390 data: 0.1137 max mem: 9377 +Eval (hcp-train-subset): [45] Total time: 0:00:14 (0.2365 s / it) +Averaged stats (hcp-train-subset): loss: 0.8342 (0.8346) +Eval (hcp-val): [45] [ 0/62] eta: 0:05:20 loss: 0.8354 (0.8354) time: 5.1695 data: 5.1328 max mem: 9377 +Eval (hcp-val): [45] [61/62] eta: 0:00:00 loss: 0.8373 (0.8380) time: 0.1096 data: 0.0847 max mem: 9377 +Eval (hcp-val): [45] Total time: 0:00:13 (0.2121 s / it) +Averaged stats (hcp-val): loss: 0.8373 (0.8380) +Eval (nsd-val): [45] [ 0/62] eta: 0:05:24 loss: 0.7981 (0.7981) time: 5.2280 data: 5.1970 max mem: 9377 +Eval (nsd-val): [45] [61/62] eta: 0:00:00 loss: 0.8058 (0.8078) time: 0.1417 data: 0.1146 max mem: 9377 +Eval (nsd-val): [45] Total time: 0:00:13 (0.2161 s / it) +Averaged stats (nsd-val): loss: 0.8058 (0.8078) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [46] [ 0/6250] eta: 8:51:01 lr: 0.000076 grad: nan (nan) loss: 0.8671 (0.8671) time: 5.0978 data: 4.9290 max mem: 9377 +Train: [46] [ 100/6250] eta: 0:20:31 lr: 0.000076 grad: 0.0850 (0.0914) loss: 0.8394 (0.8410) time: 0.1485 data: 0.0479 max mem: 9377 +Train: [46] [ 200/6250] eta: 0:17:22 lr: 0.000076 grad: 0.0801 (0.0885) loss: 0.8305 (0.8367) time: 0.1299 data: 0.0463 max mem: 9377 +Train: [46] [ 300/6250] eta: 0:16:08 lr: 0.000076 grad: 0.0797 (0.0865) loss: 0.8343 (0.8348) time: 0.1481 data: 0.0587 max mem: 9377 +Train: [46] [ 400/6250] eta: 0:15:14 lr: 0.000076 grad: 0.0825 (0.0862) loss: 0.8261 (0.8328) time: 0.1367 data: 0.0392 max mem: 9377 +Train: [46] [ 500/6250] eta: 0:14:45 lr: 0.000076 grad: 0.0804 (0.0859) loss: 0.8288 (0.8310) time: 0.1369 data: 0.0531 max mem: 9377 +Train: [46] [ 600/6250] eta: 0:14:28 lr: 0.000076 grad: 0.0790 (0.0853) loss: 0.8212 (0.8301) time: 0.1691 data: 0.0866 max mem: 9377 +Train: [46] [ 700/6250] eta: 0:14:00 lr: 0.000076 grad: 0.0814 (0.0847) loss: 0.8272 (0.8298) time: 0.1573 data: 0.0690 max mem: 9377 +Train: [46] [ 800/6250] eta: 0:13:41 lr: 0.000076 grad: 0.0770 (0.0844) loss: 0.8298 (0.8293) time: 0.1671 data: 0.0750 max mem: 9377 +Train: [46] [ 900/6250] eta: 0:13:29 lr: 0.000076 grad: 0.0806 (0.0842) loss: 0.8257 (0.8289) time: 0.1647 data: 0.0765 max mem: 9377 +Train: [46] [1000/6250] eta: 0:13:12 lr: 0.000076 grad: 0.0840 (0.0843) loss: 0.8270 (0.8285) time: 0.1750 data: 0.0937 max mem: 9377 +Train: [46] [1100/6250] eta: 0:12:53 lr: 0.000075 grad: 0.0830 (0.0846) loss: 0.8264 (0.8279) time: 0.1374 data: 0.0605 max mem: 9377 +Train: [46] [1200/6250] eta: 0:12:34 lr: 0.000075 grad: 0.0825 (0.0848) loss: 0.8227 (0.8274) time: 0.1317 data: 0.0451 max mem: 9377 +Train: [46] [1300/6250] eta: 0:12:26 lr: 0.000075 grad: 0.0863 (0.0849) loss: 0.8231 (0.8270) time: 0.1747 data: 0.0842 max mem: 9377 +Train: [46] [1400/6250] eta: 0:12:13 lr: 0.000075 grad: 0.0826 (0.0850) loss: 0.8201 (0.8265) time: 0.1628 data: 0.0708 max mem: 9377 +Train: [46] [1500/6250] eta: 0:11:59 lr: 0.000075 grad: 0.0857 (0.0853) loss: 0.8211 (0.8262) time: 0.1621 data: 0.0760 max mem: 9377 +Train: [46] [1600/6250] eta: 0:11:42 lr: 0.000075 grad: 0.0932 (0.0856) loss: 0.8199 (0.8259) time: 0.1509 data: 0.0652 max mem: 9377 +Train: [46] [1700/6250] eta: 0:11:25 lr: 0.000075 grad: 0.0849 (0.0859) loss: 0.8217 (0.8255) time: 0.1169 data: 0.0363 max mem: 9377 +Train: [46] [1800/6250] eta: 0:11:07 lr: 0.000075 grad: 0.0849 (0.0859) loss: 0.8210 (0.8253) time: 0.1478 data: 0.0615 max mem: 9377 +Train: [46] [1900/6250] eta: 0:10:52 lr: 0.000075 grad: 0.0928 (0.0860) loss: 0.8199 (0.8250) time: 0.1265 data: 0.0402 max mem: 9377 +Train: [46] [2000/6250] eta: 0:10:35 lr: 0.000075 grad: 0.0908 (0.0862) loss: 0.8214 (0.8247) time: 0.1497 data: 0.0675 max mem: 9377 +Train: [46] [2100/6250] eta: 0:10:17 lr: 0.000075 grad: 0.0834 (0.0864) loss: 0.8235 (0.8245) time: 0.1400 data: 0.0506 max mem: 9377 +Train: [46] [2200/6250] eta: 0:10:00 lr: 0.000075 grad: 0.0805 (0.0865) loss: 0.8268 (0.8243) time: 0.1545 data: 0.0691 max mem: 9377 +Train: [46] [2300/6250] eta: 0:09:44 lr: 0.000075 grad: 0.0851 (0.0865) loss: 0.8257 (0.8242) time: 0.1283 data: 0.0498 max mem: 9377 +Train: [46] [2400/6250] eta: 0:09:27 lr: 0.000075 grad: 0.0813 (0.0867) loss: 0.8245 (0.8240) time: 0.1200 data: 0.0271 max mem: 9377 +Train: [46] [2500/6250] eta: 0:09:12 lr: 0.000075 grad: 0.0837 (0.0868) loss: 0.8237 (0.8239) time: 0.1553 data: 0.0692 max mem: 9377 +Train: [46] [2600/6250] eta: 0:08:57 lr: 0.000075 grad: 0.0847 (0.0868) loss: 0.8244 (0.8239) time: 0.1308 data: 0.0481 max mem: 9377 +Train: [46] [2700/6250] eta: 0:08:43 lr: 0.000075 grad: 0.0858 (0.0868) loss: 0.8163 (0.8238) time: 0.1508 data: 0.0678 max mem: 9377 +Train: [46] [2800/6250] eta: 0:08:28 lr: 0.000075 grad: 0.0878 (0.0868) loss: 0.8180 (0.8238) time: 0.1543 data: 0.0700 max mem: 9377 +Train: [46] [2900/6250] eta: 0:08:13 lr: 0.000075 grad: 0.0839 (0.0869) loss: 0.8196 (0.8236) time: 0.1515 data: 0.0664 max mem: 9377 +Train: [46] [3000/6250] eta: 0:08:02 lr: 0.000075 grad: 0.0850 (0.0869) loss: 0.8187 (0.8235) time: 0.1644 data: 0.0720 max mem: 9377 +Train: [46] [3100/6250] eta: 0:07:48 lr: 0.000075 grad: 0.0811 (0.0869) loss: 0.8216 (0.8234) time: 0.1551 data: 0.0758 max mem: 9377 +Train: [46] [3200/6250] eta: 0:07:35 lr: 0.000075 grad: 0.0897 (0.0869) loss: 0.8197 (0.8233) time: 0.1704 data: 0.0889 max mem: 9377 +Train: [46] [3300/6250] eta: 0:07:20 lr: 0.000075 grad: 0.0840 (0.0868) loss: 0.8164 (0.8231) time: 0.1747 data: 0.0983 max mem: 9377 +Train: [46] [3400/6250] eta: 0:07:05 lr: 0.000075 grad: 0.0834 (0.0869) loss: 0.8182 (0.8230) time: 0.1513 data: 0.0676 max mem: 9377 +Train: [46] [3500/6250] eta: 0:06:50 lr: 0.000075 grad: 0.0802 (0.0869) loss: 0.8180 (0.8230) time: 0.1345 data: 0.0440 max mem: 9377 +Train: [46] [3600/6250] eta: 0:06:34 lr: 0.000075 grad: 0.0879 (0.0869) loss: 0.8148 (0.8229) time: 0.1436 data: 0.0615 max mem: 9377 +Train: [46] [3700/6250] eta: 0:06:18 lr: 0.000075 grad: 0.0875 (0.0870) loss: 0.8245 (0.8228) time: 0.1227 data: 0.0330 max mem: 9377 +Train: [46] [3800/6250] eta: 0:06:02 lr: 0.000075 grad: 0.0858 (0.0870) loss: 0.8216 (0.8227) time: 0.1371 data: 0.0538 max mem: 9377 +Train: [46] [3900/6250] eta: 0:05:47 lr: 0.000075 grad: 0.0824 (0.0871) loss: 0.8194 (0.8226) time: 0.1446 data: 0.0583 max mem: 9377 +Train: [46] [4000/6250] eta: 0:05:32 lr: 0.000075 grad: 0.0828 (0.0871) loss: 0.8190 (0.8224) time: 0.1582 data: 0.0740 max mem: 9377 +Train: [46] [4100/6250] eta: 0:05:17 lr: 0.000075 grad: 0.0844 (0.0872) loss: 0.8182 (0.8223) time: 0.1451 data: 0.0642 max mem: 9377 +Train: [46] [4200/6250] eta: 0:05:02 lr: 0.000074 grad: 0.0816 (0.0872) loss: 0.8222 (0.8223) time: 0.1621 data: 0.0819 max mem: 9377 +Train: [46] [4300/6250] eta: 0:04:47 lr: 0.000074 grad: 0.0884 (0.0872) loss: 0.8207 (0.8222) time: 0.1095 data: 0.0268 max mem: 9377 +Train: [46] [4400/6250] eta: 0:04:32 lr: 0.000074 grad: 0.0901 (0.0873) loss: 0.8215 (0.8222) time: 0.1412 data: 0.0538 max mem: 9377 +Train: [46] [4500/6250] eta: 0:04:16 lr: 0.000074 grad: 0.0852 (0.0874) loss: 0.8239 (0.8221) time: 0.1290 data: 0.0424 max mem: 9377 +Train: [46] [4600/6250] eta: 0:04:02 lr: 0.000074 grad: 0.0874 (0.0875) loss: 0.8192 (0.8220) time: 0.1255 data: 0.0421 max mem: 9377 +Train: [46] [4700/6250] eta: 0:03:47 lr: 0.000074 grad: 0.0853 (0.0876) loss: 0.8198 (0.8219) time: 0.1406 data: 0.0495 max mem: 9377 +Train: [46] [4800/6250] eta: 0:03:32 lr: 0.000074 grad: 0.0849 (0.0877) loss: 0.8175 (0.8218) time: 0.1431 data: 0.0585 max mem: 9377 +Train: [46] [4900/6250] eta: 0:03:17 lr: 0.000074 grad: 0.0849 (0.0878) loss: 0.8165 (0.8217) time: 0.1295 data: 0.0520 max mem: 9377 +Train: [46] [5000/6250] eta: 0:03:02 lr: 0.000074 grad: 0.0857 (0.0879) loss: 0.8244 (0.8217) time: 0.1362 data: 0.0556 max mem: 9377 +Train: [46] [5100/6250] eta: 0:02:47 lr: 0.000074 grad: 0.0938 (0.0879) loss: 0.8131 (0.8216) time: 0.1298 data: 0.0485 max mem: 9377 +Train: [46] [5200/6250] eta: 0:02:32 lr: 0.000074 grad: 0.0807 (0.0880) loss: 0.8174 (0.8215) time: 0.1448 data: 0.0646 max mem: 9377 +Train: [46] [5300/6250] eta: 0:02:18 lr: 0.000074 grad: 0.0853 (0.0880) loss: 0.8200 (0.8215) time: 0.1427 data: 0.0549 max mem: 9377 +Train: [46] [5400/6250] eta: 0:02:03 lr: 0.000074 grad: 0.0891 (0.0880) loss: 0.8184 (0.8214) time: 0.1381 data: 0.0548 max mem: 9377 +Train: [46] [5500/6250] eta: 0:01:48 lr: 0.000074 grad: 0.0869 (0.0881) loss: 0.8220 (0.8214) time: 0.1338 data: 0.0478 max mem: 9377 +Train: [46] [5600/6250] eta: 0:01:34 lr: 0.000074 grad: 0.0939 (0.0881) loss: 0.8151 (0.8213) time: 0.1305 data: 0.0422 max mem: 9377 +Train: [46] [5700/6250] eta: 0:01:19 lr: 0.000074 grad: 0.0860 (0.0882) loss: 0.8212 (0.8213) time: 0.1266 data: 0.0408 max mem: 9377 +Train: [46] [5800/6250] eta: 0:01:05 lr: 0.000074 grad: 0.0881 (0.0882) loss: 0.8195 (0.8213) time: 0.1781 data: 0.0981 max mem: 9377 +Train: [46] [5900/6250] eta: 0:00:50 lr: 0.000074 grad: 0.0838 (0.0882) loss: 0.8222 (0.8213) time: 0.1283 data: 0.0547 max mem: 9377 +Train: [46] [6000/6250] eta: 0:00:36 lr: 0.000074 grad: 0.0827 (0.0882) loss: 0.8237 (0.8213) time: 0.1277 data: 0.0440 max mem: 9377 +Train: [46] [6100/6250] eta: 0:00:21 lr: 0.000074 grad: 0.0884 (0.0882) loss: 0.8232 (0.8213) time: 0.1368 data: 0.0568 max mem: 9377 +Train: [46] [6200/6250] eta: 0:00:07 lr: 0.000074 grad: 0.0921 (0.0883) loss: 0.8147 (0.8213) time: 0.1368 data: 0.0613 max mem: 9377 +Train: [46] [6249/6250] eta: 0:00:00 lr: 0.000074 grad: 0.0849 (0.0883) loss: 0.8203 (0.8212) time: 0.1498 data: 0.0698 max mem: 9377 +Train: [46] Total time: 0:15:08 (0.1453 s / it) +Averaged stats: lr: 0.000074 grad: 0.0849 (0.0883) loss: 0.8203 (0.8212) +Eval (hcp-train-subset): [46] [ 0/62] eta: 0:05:01 loss: 0.8335 (0.8335) time: 4.8571 data: 4.8250 max mem: 9377 +Eval (hcp-train-subset): [46] [61/62] eta: 0:00:00 loss: 0.8323 (0.8336) time: 0.1336 data: 0.1083 max mem: 9377 +Eval (hcp-train-subset): [46] Total time: 0:00:14 (0.2310 s / it) +Averaged stats (hcp-train-subset): loss: 0.8323 (0.8336) +Eval (hcp-val): [46] [ 0/62] eta: 0:03:17 loss: 0.8332 (0.8332) time: 3.1872 data: 3.1180 max mem: 9377 +Eval (hcp-val): [46] [61/62] eta: 0:00:00 loss: 0.8364 (0.8380) time: 0.1246 data: 0.0972 max mem: 9377 +Eval (hcp-val): [46] Total time: 0:00:13 (0.2188 s / it) +Averaged stats (hcp-val): loss: 0.8364 (0.8380) +Eval (nsd-val): [46] [ 0/62] eta: 0:04:24 loss: 0.7976 (0.7976) time: 4.2586 data: 4.1961 max mem: 9377 +Eval (nsd-val): [46] [61/62] eta: 0:00:00 loss: 0.8097 (0.8102) time: 0.1347 data: 0.1080 max mem: 9377 +Eval (nsd-val): [46] Total time: 0:00:13 (0.2101 s / it) +Averaged stats (nsd-val): loss: 0.8097 (0.8102) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [47] [ 0/6250] eta: 10:34:50 lr: 0.000074 grad: 0.2274 (0.2274) loss: 0.8235 (0.8235) time: 6.0945 data: 5.9643 max mem: 9377 +Train: [47] [ 100/6250] eta: 0:21:03 lr: 0.000074 grad: 0.0807 (0.1048) loss: 0.8200 (0.8246) time: 0.1803 data: 0.0897 max mem: 9377 +Train: [47] [ 200/6250] eta: 0:17:45 lr: 0.000074 grad: 0.0785 (0.0948) loss: 0.8290 (0.8244) time: 0.1594 data: 0.0760 max mem: 9377 +Train: [47] [ 300/6250] eta: 0:16:49 lr: 0.000074 grad: 0.0835 (0.0919) loss: 0.8275 (0.8250) time: 0.1682 data: 0.0795 max mem: 9377 +Train: [47] [ 400/6250] eta: 0:15:54 lr: 0.000074 grad: 0.0859 (0.0906) loss: 0.8155 (0.8243) time: 0.1546 data: 0.0537 max mem: 9377 +Train: [47] [ 500/6250] eta: 0:15:07 lr: 0.000074 grad: 0.0823 (0.0906) loss: 0.8173 (0.8226) time: 0.1404 data: 0.0545 max mem: 9377 +Train: [47] [ 600/6250] eta: 0:14:33 lr: 0.000074 grad: 0.0842 (0.0904) loss: 0.8238 (0.8216) time: 0.1194 data: 0.0284 max mem: 9377 +Train: [47] [ 700/6250] eta: 0:14:20 lr: 0.000074 grad: 0.0845 (0.0902) loss: 0.8176 (0.8209) time: 0.1580 data: 0.0757 max mem: 9377 +Train: [47] [ 800/6250] eta: 0:14:00 lr: 0.000074 grad: 0.0836 (0.0901) loss: 0.8219 (0.8208) time: 0.1493 data: 0.0652 max mem: 9377 +Train: [47] [ 900/6250] eta: 0:13:48 lr: 0.000074 grad: 0.0846 (0.0898) loss: 0.8176 (0.8204) time: 0.1705 data: 0.0902 max mem: 9377 +Train: [47] [1000/6250] eta: 0:13:40 lr: 0.000073 grad: 0.0815 (0.0892) loss: 0.8242 (0.8205) time: 0.1979 data: 0.1164 max mem: 9377 +Train: [47] [1100/6250] eta: 0:13:29 lr: 0.000073 grad: 0.0820 (0.0893) loss: 0.8205 (0.8204) time: 0.1603 data: 0.0770 max mem: 9377 +Train: [47] [1200/6250] eta: 0:13:15 lr: 0.000073 grad: 0.0868 (0.0892) loss: 0.8147 (0.8204) time: 0.1642 data: 0.0779 max mem: 9377 +Train: [47] [1300/6250] eta: 0:12:59 lr: 0.000073 grad: 0.0876 (0.0891) loss: 0.8155 (0.8202) time: 0.1651 data: 0.0845 max mem: 9377 +Train: [47] [1400/6250] eta: 0:12:41 lr: 0.000073 grad: 0.0863 (0.0891) loss: 0.8179 (0.8202) time: 0.1555 data: 0.0737 max mem: 9377 +Train: [47] [1500/6250] eta: 0:12:20 lr: 0.000073 grad: 0.0818 (0.0892) loss: 0.8199 (0.8201) time: 0.1363 data: 0.0518 max mem: 9377 +Train: [47] [1600/6250] eta: 0:12:02 lr: 0.000073 grad: 0.0857 (0.0891) loss: 0.8184 (0.8199) time: 0.1496 data: 0.0625 max mem: 9377 +Train: [47] [1700/6250] eta: 0:11:45 lr: 0.000073 grad: 0.0860 (0.0893) loss: 0.8138 (0.8197) time: 0.1380 data: 0.0584 max mem: 9377 +Train: [47] [1800/6250] eta: 0:11:25 lr: 0.000073 grad: 0.0886 (0.0892) loss: 0.8178 (0.8197) time: 0.1258 data: 0.0389 max mem: 9377 +Train: [47] [1900/6250] eta: 0:11:07 lr: 0.000073 grad: 0.0919 (0.0891) loss: 0.8175 (0.8197) time: 0.1515 data: 0.0695 max mem: 9377 +Train: [47] [2000/6250] eta: 0:10:53 lr: 0.000073 grad: 0.0822 (0.0891) loss: 0.8227 (0.8198) time: 0.1710 data: 0.0913 max mem: 9377 +Train: [47] [2100/6250] eta: 0:10:34 lr: 0.000073 grad: 0.0817 (0.0891) loss: 0.8206 (0.8199) time: 0.1488 data: 0.0690 max mem: 9377 +Train: [47] [2200/6250] eta: 0:10:19 lr: 0.000073 grad: 0.0829 (0.0889) loss: 0.8203 (0.8201) time: 0.1975 data: 0.0504 max mem: 9377 +Train: [47] [2300/6250] eta: 0:10:04 lr: 0.000073 grad: 0.0764 (0.0888) loss: 0.8255 (0.8203) time: 0.1307 data: 0.0432 max mem: 9377 +Train: [47] [2400/6250] eta: 0:09:50 lr: 0.000073 grad: 0.0857 (0.0886) loss: 0.8259 (0.8205) time: 0.1770 data: 0.0958 max mem: 9377 +Train: [47] [2500/6250] eta: 0:09:36 lr: 0.000073 grad: 0.0841 (0.0886) loss: 0.8208 (0.8206) time: 0.1627 data: 0.0726 max mem: 9377 +Train: [47] [2600/6250] eta: 0:09:22 lr: 0.000073 grad: 0.0818 (0.0885) loss: 0.8262 (0.8206) time: 0.1665 data: 0.0852 max mem: 9377 +Train: [47] [2700/6250] eta: 0:09:10 lr: 0.000073 grad: 0.0857 (0.0885) loss: 0.8178 (0.8206) time: 0.1370 data: 0.0480 max mem: 9377 +Train: [47] [2800/6250] eta: 0:08:55 lr: 0.000073 grad: 0.0875 (0.0885) loss: 0.8163 (0.8205) time: 0.1629 data: 0.0814 max mem: 9377 +Train: [47] [2900/6250] eta: 0:08:40 lr: 0.000073 grad: 0.0895 (0.0887) loss: 0.8133 (0.8205) time: 0.1530 data: 0.0772 max mem: 9377 +Train: [47] [3000/6250] eta: 0:08:24 lr: 0.000073 grad: 0.0865 (0.0893) loss: 0.8192 (0.8204) time: 0.1551 data: 0.0722 max mem: 9377 +Train: [47] [3100/6250] eta: 0:08:08 lr: 0.000073 grad: 0.0836 (0.0893) loss: 0.8247 (0.8204) time: 0.1512 data: 0.0629 max mem: 9377 +Train: [47] [3200/6250] eta: 0:07:52 lr: 0.000073 grad: 0.0876 (0.0892) loss: 0.8220 (0.8204) time: 0.1489 data: 0.0646 max mem: 9377 +Train: [47] [3300/6250] eta: 0:07:36 lr: 0.000073 grad: 0.0894 (0.0891) loss: 0.8084 (0.8204) time: 0.1762 data: 0.0908 max mem: 9377 +Train: [47] [3400/6250] eta: 0:07:20 lr: 0.000073 grad: 0.0912 (0.0892) loss: 0.8197 (0.8204) time: 0.1606 data: 0.0794 max mem: 9377 +Train: [47] [3500/6250] eta: 0:07:03 lr: 0.000073 grad: 0.0886 (0.0892) loss: 0.8213 (0.8204) time: 0.1739 data: 0.0904 max mem: 9377 +Train: [47] [3600/6250] eta: 0:06:47 lr: 0.000073 grad: 0.0946 (0.0892) loss: 0.8099 (0.8203) time: 0.1610 data: 0.0791 max mem: 9377 +Train: [47] [3700/6250] eta: 0:06:34 lr: 0.000073 grad: 0.0901 (0.0892) loss: 0.8177 (0.8203) time: 0.2025 data: 0.1134 max mem: 9377 +Train: [47] [3800/6250] eta: 0:06:20 lr: 0.000073 grad: 0.0854 (0.0891) loss: 0.8187 (0.8203) time: 0.2002 data: 0.1206 max mem: 9377 +Train: [47] [3900/6250] eta: 0:06:06 lr: 0.000073 grad: 0.0818 (0.0890) loss: 0.8161 (0.8202) time: 0.1729 data: 0.0951 max mem: 9377 +Train: [47] [4000/6250] eta: 0:05:52 lr: 0.000073 grad: 0.0833 (0.0890) loss: 0.8250 (0.8202) time: 0.1732 data: 0.0906 max mem: 9377 +Train: [47] [4100/6250] eta: 0:05:37 lr: 0.000072 grad: 0.0895 (0.0890) loss: 0.8273 (0.8203) time: 0.1467 data: 0.0669 max mem: 9377 +Train: [47] [4200/6250] eta: 0:05:21 lr: 0.000072 grad: 0.0838 (0.0889) loss: 0.8211 (0.8203) time: 0.1671 data: 0.0878 max mem: 9377 +Train: [47] [4300/6250] eta: 0:05:05 lr: 0.000072 grad: 0.0830 (0.0888) loss: 0.8228 (0.8204) time: 0.1584 data: 0.0742 max mem: 9377 +Train: [47] [4400/6250] eta: 0:04:49 lr: 0.000072 grad: 0.0831 (0.0888) loss: 0.8272 (0.8204) time: 0.1644 data: 0.0864 max mem: 9377 +Train: [47] [4500/6250] eta: 0:04:33 lr: 0.000072 grad: 0.0826 (0.0888) loss: 0.8234 (0.8205) time: 0.1520 data: 0.0674 max mem: 9377 +Train: [47] [4600/6250] eta: 0:04:17 lr: 0.000072 grad: 0.0847 (0.0887) loss: 0.8176 (0.8205) time: 0.1509 data: 0.0698 max mem: 9377 +Train: [47] [4700/6250] eta: 0:04:01 lr: 0.000072 grad: 0.0901 (0.0887) loss: 0.8175 (0.8205) time: 0.1483 data: 0.0658 max mem: 9377 +Train: [47] [4800/6250] eta: 0:03:45 lr: 0.000072 grad: 0.0915 (0.0887) loss: 0.8267 (0.8205) time: 0.1455 data: 0.0637 max mem: 9377 +Train: [47] [4900/6250] eta: 0:03:29 lr: 0.000072 grad: 0.0951 (0.0888) loss: 0.8179 (0.8205) time: 0.1404 data: 0.0593 max mem: 9377 +Train: [47] [5000/6250] eta: 0:03:14 lr: 0.000072 grad: 0.0832 (0.0888) loss: 0.8252 (0.8205) time: 0.1598 data: 0.0775 max mem: 9377 +Train: [47] [5100/6250] eta: 0:02:58 lr: 0.000072 grad: 0.0848 (0.0888) loss: 0.8255 (0.8204) time: 0.1458 data: 0.0653 max mem: 9377 +Train: [47] [5200/6250] eta: 0:02:42 lr: 0.000072 grad: 0.0887 (0.0889) loss: 0.8163 (0.8204) time: 0.1290 data: 0.0485 max mem: 9377 +Train: [47] [5300/6250] eta: 0:02:26 lr: 0.000072 grad: 0.0908 (0.0890) loss: 0.8128 (0.8204) time: 0.1321 data: 0.0474 max mem: 9377 +Train: [47] [5400/6250] eta: 0:02:11 lr: 0.000072 grad: 0.0959 (0.0890) loss: 0.8245 (0.8203) time: 0.1334 data: 0.0482 max mem: 9377 +Train: [47] [5500/6250] eta: 0:01:55 lr: 0.000072 grad: 0.0912 (0.0891) loss: 0.8203 (0.8203) time: 0.1525 data: 0.0672 max mem: 9377 +Train: [47] [5600/6250] eta: 0:01:40 lr: 0.000072 grad: 0.0849 (0.0891) loss: 0.8141 (0.8203) time: 0.1501 data: 0.0682 max mem: 9377 +Train: [47] [5700/6250] eta: 0:01:24 lr: 0.000072 grad: 0.0866 (0.0892) loss: 0.8167 (0.8202) time: 0.1557 data: 0.0768 max mem: 9377 +Train: [47] [5800/6250] eta: 0:01:09 lr: 0.000072 grad: 0.0865 (0.0892) loss: 0.8203 (0.8201) time: 0.1602 data: 0.0809 max mem: 9377 +Train: [47] [5900/6250] eta: 0:00:53 lr: 0.000072 grad: 0.0895 (0.0892) loss: 0.8172 (0.8201) time: 0.1464 data: 0.0610 max mem: 9377 +Train: [47] [6000/6250] eta: 0:00:38 lr: 0.000072 grad: 0.0897 (0.0893) loss: 0.8143 (0.8200) time: 0.1442 data: 0.0662 max mem: 9377 +Train: [47] [6100/6250] eta: 0:00:23 lr: 0.000072 grad: 0.0889 (0.0893) loss: 0.8205 (0.8200) time: 0.1476 data: 0.0530 max mem: 9377 +Train: [47] [6200/6250] eta: 0:00:07 lr: 0.000072 grad: 0.0923 (0.0893) loss: 0.8166 (0.8199) time: 0.1545 data: 0.0722 max mem: 9377 +Train: [47] [6249/6250] eta: 0:00:00 lr: 0.000072 grad: 0.0906 (0.0894) loss: 0.8186 (0.8199) time: 0.1378 data: 0.0638 max mem: 9377 +Train: [47] Total time: 0:16:04 (0.1543 s / it) +Averaged stats: lr: 0.000072 grad: 0.0906 (0.0894) loss: 0.8186 (0.8199) +Eval (hcp-train-subset): [47] [ 0/62] eta: 0:05:23 loss: 0.8309 (0.8309) time: 5.2173 data: 5.1864 max mem: 9377 +Eval (hcp-train-subset): [47] [61/62] eta: 0:00:00 loss: 0.8283 (0.8313) time: 0.1438 data: 0.1183 max mem: 9377 +Eval (hcp-train-subset): [47] Total time: 0:00:14 (0.2339 s / it) +Averaged stats (hcp-train-subset): loss: 0.8283 (0.8313) +Eval (hcp-val): [47] [ 0/62] eta: 0:03:40 loss: 0.8358 (0.8358) time: 3.5531 data: 3.4812 max mem: 9377 +Eval (hcp-val): [47] [61/62] eta: 0:00:00 loss: 0.8352 (0.8355) time: 0.1128 data: 0.0877 max mem: 9377 +Eval (hcp-val): [47] Total time: 0:00:13 (0.2127 s / it) +Averaged stats (hcp-val): loss: 0.8352 (0.8355) +Eval (nsd-val): [47] [ 0/62] eta: 0:04:51 loss: 0.8019 (0.8019) time: 4.7065 data: 4.6763 max mem: 9377 +Eval (nsd-val): [47] [61/62] eta: 0:00:00 loss: 0.8094 (0.8095) time: 0.1291 data: 0.1021 max mem: 9377 +Eval (nsd-val): [47] Total time: 0:00:13 (0.2154 s / it) +Averaged stats (nsd-val): loss: 0.8094 (0.8095) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [48] [ 0/6250] eta: 8:24:34 lr: 0.000072 grad: 0.1123 (0.1123) loss: 0.8466 (0.8466) time: 4.8438 data: 4.6271 max mem: 9377 +Train: [48] [ 100/6250] eta: 0:19:55 lr: 0.000072 grad: 0.0796 (0.1148) loss: 0.8224 (0.8279) time: 0.1454 data: 0.0479 max mem: 9377 +Train: [48] [ 200/6250] eta: 0:17:14 lr: 0.000072 grad: 0.0906 (0.1136) loss: 0.8104 (0.8213) time: 0.1445 data: 0.0476 max mem: 9377 +Train: [48] [ 300/6250] eta: 0:16:13 lr: 0.000072 grad: 0.0833 (0.1083) loss: 0.8243 (0.8183) time: 0.1592 data: 0.0715 max mem: 9377 +Train: [48] [ 400/6250] eta: 0:15:22 lr: 0.000072 grad: 0.0811 (0.1047) loss: 0.8279 (0.8178) time: 0.1381 data: 0.0463 max mem: 9377 +Train: [48] [ 500/6250] eta: 0:14:50 lr: 0.000072 grad: 0.0855 (0.1019) loss: 0.8258 (0.8180) time: 0.1518 data: 0.0652 max mem: 9377 +Train: [48] [ 600/6250] eta: 0:14:27 lr: 0.000072 grad: 0.0804 (0.0997) loss: 0.8263 (0.8184) time: 0.1648 data: 0.0781 max mem: 9377 +Train: [48] [ 700/6250] eta: 0:14:07 lr: 0.000072 grad: 0.0867 (0.0979) loss: 0.8281 (0.8188) time: 0.1473 data: 0.0553 max mem: 9377 +Train: [48] [ 800/6250] eta: 0:13:49 lr: 0.000072 grad: 0.0819 (0.0963) loss: 0.8254 (0.8193) time: 0.1621 data: 0.0745 max mem: 9377 +Train: [48] [ 900/6250] eta: 0:13:28 lr: 0.000071 grad: 0.0863 (0.0951) loss: 0.8247 (0.8198) time: 0.1341 data: 0.0498 max mem: 9377 +Train: [48] [1000/6250] eta: 0:13:03 lr: 0.000071 grad: 0.0776 (0.0943) loss: 0.8305 (0.8202) time: 0.1463 data: 0.0686 max mem: 9377 +Train: [48] [1100/6250] eta: 0:12:45 lr: 0.000071 grad: 0.0870 (0.0935) loss: 0.8219 (0.8206) time: 0.1523 data: 0.0721 max mem: 9377 +Train: [48] [1200/6250] eta: 0:12:26 lr: 0.000071 grad: 0.0847 (0.0932) loss: 0.8232 (0.8205) time: 0.1304 data: 0.0444 max mem: 9377 +Train: [48] [1300/6250] eta: 0:12:06 lr: 0.000071 grad: 0.0832 (0.0927) loss: 0.8270 (0.8205) time: 0.1462 data: 0.0576 max mem: 9377 +Train: [48] [1400/6250] eta: 0:11:46 lr: 0.000071 grad: 0.0803 (0.0922) loss: 0.8290 (0.8206) time: 0.1296 data: 0.0426 max mem: 9377 +Train: [48] [1500/6250] eta: 0:11:29 lr: 0.000071 grad: 0.0831 (0.0919) loss: 0.8264 (0.8206) time: 0.1430 data: 0.0595 max mem: 9377 +Train: [48] [1600/6250] eta: 0:11:15 lr: 0.000071 grad: 0.0868 (0.0915) loss: 0.8204 (0.8208) time: 0.1556 data: 0.0782 max mem: 9377 +Train: [48] [1700/6250] eta: 0:11:04 lr: 0.000071 grad: 0.0830 (0.0912) loss: 0.8189 (0.8208) time: 0.1506 data: 0.0723 max mem: 9377 +Train: [48] [1800/6250] eta: 0:10:53 lr: 0.000071 grad: 0.0839 (0.0910) loss: 0.8278 (0.8210) time: 0.1789 data: 0.0973 max mem: 9377 +Train: [48] [1900/6250] eta: 0:10:42 lr: 0.000071 grad: 0.0845 (0.0908) loss: 0.8264 (0.8210) time: 0.1668 data: 0.0824 max mem: 9377 +Train: [48] [2000/6250] eta: 0:10:32 lr: 0.000071 grad: 0.0847 (0.0908) loss: 0.8247 (0.8210) time: 0.1627 data: 0.0830 max mem: 9377 +Train: [48] [2100/6250] eta: 0:10:15 lr: 0.000071 grad: 0.0868 (0.0906) loss: 0.8216 (0.8209) time: 0.1394 data: 0.0575 max mem: 9377 +Train: [48] [2200/6250] eta: 0:09:59 lr: 0.000071 grad: 0.0816 (0.0904) loss: 0.8239 (0.8208) time: 0.1054 data: 0.0168 max mem: 9377 +Train: [48] [2300/6250] eta: 0:09:44 lr: 0.000071 grad: 0.0828 (0.0903) loss: 0.8208 (0.8208) time: 0.1589 data: 0.0583 max mem: 9377 +Train: [48] [2400/6250] eta: 0:09:30 lr: 0.000071 grad: 0.0889 (0.0902) loss: 0.8190 (0.8208) time: 0.1475 data: 0.0574 max mem: 9377 +Train: [48] [2500/6250] eta: 0:09:16 lr: 0.000071 grad: 0.0887 (0.0901) loss: 0.8202 (0.8209) time: 0.1490 data: 0.0668 max mem: 9377 +Train: [48] [2600/6250] eta: 0:09:02 lr: 0.000071 grad: 0.0861 (0.0900) loss: 0.8207 (0.8209) time: 0.1476 data: 0.0663 max mem: 9377 +Train: [48] [2700/6250] eta: 0:08:48 lr: 0.000071 grad: 0.0898 (0.0900) loss: 0.8257 (0.8209) time: 0.1457 data: 0.0633 max mem: 9377 +Train: [48] [2800/6250] eta: 0:08:32 lr: 0.000071 grad: 0.0824 (0.0899) loss: 0.8253 (0.8209) time: 0.1427 data: 0.0562 max mem: 9377 +Train: [48] [2900/6250] eta: 0:08:17 lr: 0.000071 grad: 0.0855 (0.0898) loss: 0.8235 (0.8210) time: 0.1616 data: 0.0810 max mem: 9377 +Train: [48] [3000/6250] eta: 0:08:00 lr: 0.000071 grad: 0.0841 (0.0898) loss: 0.8264 (0.8209) time: 0.1419 data: 0.0636 max mem: 9377 +Train: [48] [3100/6250] eta: 0:07:44 lr: 0.000071 grad: 0.0841 (0.0898) loss: 0.8235 (0.8210) time: 0.1300 data: 0.0315 max mem: 9377 +Train: [48] [3200/6250] eta: 0:07:28 lr: 0.000071 grad: 0.0839 (0.0897) loss: 0.8259 (0.8210) time: 0.1181 data: 0.0309 max mem: 9377 +Train: [48] [3300/6250] eta: 0:07:13 lr: 0.000071 grad: 0.0873 (0.0898) loss: 0.8164 (0.8210) time: 0.1327 data: 0.0518 max mem: 9377 +Train: [48] [3400/6250] eta: 0:06:58 lr: 0.000071 grad: 0.0789 (0.0897) loss: 0.8244 (0.8210) time: 0.1495 data: 0.0698 max mem: 9377 +Train: [48] [3500/6250] eta: 0:06:44 lr: 0.000071 grad: 0.0840 (0.0897) loss: 0.8198 (0.8209) time: 0.1694 data: 0.0956 max mem: 9377 +Train: [48] [3600/6250] eta: 0:06:29 lr: 0.000071 grad: 0.0891 (0.0897) loss: 0.8128 (0.8209) time: 0.1462 data: 0.0616 max mem: 9377 +Train: [48] [3700/6250] eta: 0:06:14 lr: 0.000071 grad: 0.0866 (0.0897) loss: 0.8221 (0.8209) time: 0.1359 data: 0.0529 max mem: 9377 +Train: [48] [3800/6250] eta: 0:05:58 lr: 0.000071 grad: 0.0897 (0.0898) loss: 0.8223 (0.8208) time: 0.1398 data: 0.0595 max mem: 9377 +Train: [48] [3900/6250] eta: 0:05:43 lr: 0.000070 grad: 0.0849 (0.0898) loss: 0.8255 (0.8208) time: 0.1426 data: 0.0579 max mem: 9377 +Train: [48] [4000/6250] eta: 0:05:28 lr: 0.000070 grad: 0.0872 (0.0898) loss: 0.8168 (0.8207) time: 0.1425 data: 0.0622 max mem: 9377 +Train: [48] [4100/6250] eta: 0:05:14 lr: 0.000070 grad: 0.0866 (0.0898) loss: 0.8123 (0.8207) time: 0.1436 data: 0.0553 max mem: 9377 +Train: [48] [4200/6250] eta: 0:04:59 lr: 0.000070 grad: 0.0841 (0.0898) loss: 0.8242 (0.8207) time: 0.1335 data: 0.0459 max mem: 9377 +Train: [48] [4300/6250] eta: 0:04:44 lr: 0.000070 grad: 0.0839 (0.0898) loss: 0.8173 (0.8206) time: 0.1316 data: 0.0517 max mem: 9377 +Train: [48] [4400/6250] eta: 0:04:29 lr: 0.000070 grad: 0.0846 (0.0897) loss: 0.8127 (0.8206) time: 0.1498 data: 0.0686 max mem: 9377 +Train: [48] [4500/6250] eta: 0:04:14 lr: 0.000070 grad: 0.0912 (0.0898) loss: 0.8171 (0.8205) time: 0.1170 data: 0.0228 max mem: 9377 +Train: [48] [4600/6250] eta: 0:04:00 lr: 0.000070 grad: 0.0819 (0.0898) loss: 0.8197 (0.8205) time: 0.1638 data: 0.0765 max mem: 9377 +Train: [48] [4700/6250] eta: 0:03:45 lr: 0.000070 grad: 0.0864 (0.0898) loss: 0.8183 (0.8205) time: 0.1535 data: 0.0755 max mem: 9377 +Train: [48] [4800/6250] eta: 0:03:31 lr: 0.000070 grad: 0.0914 (0.0898) loss: 0.8218 (0.8204) time: 0.1988 data: 0.1194 max mem: 9377 +Train: [48] [4900/6250] eta: 0:03:16 lr: 0.000070 grad: 0.0918 (0.0899) loss: 0.8172 (0.8204) time: 0.1342 data: 0.0511 max mem: 9377 +Train: [48] [5000/6250] eta: 0:03:02 lr: 0.000070 grad: 0.0885 (0.0900) loss: 0.8183 (0.8203) time: 0.1871 data: 0.1049 max mem: 9377 +Train: [48] [5100/6250] eta: 0:02:47 lr: 0.000070 grad: 0.0906 (0.0900) loss: 0.8172 (0.8203) time: 0.1208 data: 0.0328 max mem: 9377 +Train: [48] [5200/6250] eta: 0:02:32 lr: 0.000070 grad: 0.0939 (0.0900) loss: 0.8181 (0.8202) time: 0.1381 data: 0.0484 max mem: 9377 +Train: [48] [5300/6250] eta: 0:02:18 lr: 0.000070 grad: 0.0881 (0.0900) loss: 0.8132 (0.8202) time: 0.1458 data: 0.0658 max mem: 9377 +Train: [48] [5400/6250] eta: 0:02:03 lr: 0.000070 grad: 0.0865 (0.0900) loss: 0.8190 (0.8201) time: 0.1506 data: 0.0685 max mem: 9377 +Train: [48] [5500/6250] eta: 0:01:48 lr: 0.000070 grad: 0.0865 (0.0901) loss: 0.8137 (0.8201) time: 0.1637 data: 0.0812 max mem: 9377 +Train: [48] [5600/6250] eta: 0:01:34 lr: 0.000070 grad: 0.0918 (0.0901) loss: 0.8102 (0.8200) time: 0.1295 data: 0.0475 max mem: 9377 +Train: [48] [5700/6250] eta: 0:01:19 lr: 0.000070 grad: 0.0889 (0.0902) loss: 0.8175 (0.8200) time: 0.1361 data: 0.0485 max mem: 9377 +Train: [48] [5800/6250] eta: 0:01:05 lr: 0.000070 grad: 0.0887 (0.0902) loss: 0.8152 (0.8200) time: 0.1373 data: 0.0512 max mem: 9377 +Train: [48] [5900/6250] eta: 0:00:50 lr: 0.000070 grad: 0.0941 (0.0902) loss: 0.8131 (0.8199) time: 0.1457 data: 0.0616 max mem: 9377 +Train: [48] [6000/6250] eta: 0:00:36 lr: 0.000070 grad: 0.0831 (0.0902) loss: 0.8219 (0.8198) time: 0.1432 data: 0.0624 max mem: 9377 +Train: [48] [6100/6250] eta: 0:00:21 lr: 0.000070 grad: 0.0836 (0.0903) loss: 0.8231 (0.8198) time: 0.1375 data: 0.0548 max mem: 9377 +Train: [48] [6200/6250] eta: 0:00:07 lr: 0.000070 grad: 0.0840 (0.0902) loss: 0.8275 (0.8198) time: 0.1423 data: 0.0590 max mem: 9377 +Train: [48] [6249/6250] eta: 0:00:00 lr: 0.000070 grad: 0.0877 (0.0902) loss: 0.8228 (0.8198) time: 0.1233 data: 0.0388 max mem: 9377 +Train: [48] Total time: 0:15:11 (0.1458 s / it) +Averaged stats: lr: 0.000070 grad: 0.0877 (0.0902) loss: 0.8228 (0.8198) +Eval (hcp-train-subset): [48] [ 0/62] eta: 0:04:49 loss: 0.8332 (0.8332) time: 4.6697 data: 4.5963 max mem: 9377 +Eval (hcp-train-subset): [48] [61/62] eta: 0:00:00 loss: 0.8323 (0.8318) time: 0.1482 data: 0.1208 max mem: 9377 +Eval (hcp-train-subset): [48] Total time: 0:00:14 (0.2410 s / it) +Averaged stats (hcp-train-subset): loss: 0.8323 (0.8318) +Eval (hcp-val): [48] [ 0/62] eta: 0:04:17 loss: 0.8360 (0.8360) time: 4.1466 data: 4.0863 max mem: 9377 +Eval (hcp-val): [48] [61/62] eta: 0:00:00 loss: 0.8333 (0.8365) time: 0.1386 data: 0.1130 max mem: 9377 +Eval (hcp-val): [48] Total time: 0:00:13 (0.2193 s / it) +Averaged stats (hcp-val): loss: 0.8333 (0.8365) +Eval (nsd-val): [48] [ 0/62] eta: 0:03:45 loss: 0.7999 (0.7999) time: 3.6384 data: 3.5278 max mem: 9377 +Eval (nsd-val): [48] [61/62] eta: 0:00:00 loss: 0.8103 (0.8106) time: 0.1297 data: 0.1042 max mem: 9377 +Eval (nsd-val): [48] Total time: 0:00:14 (0.2306 s / it) +Averaged stats (nsd-val): loss: 0.8103 (0.8106) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [49] [ 0/6250] eta: 7:41:02 lr: 0.000070 grad: 0.1140 (0.1140) loss: 0.8438 (0.8438) time: 4.4259 data: 4.2531 max mem: 9377 +Train: [49] [ 100/6250] eta: 0:21:01 lr: 0.000070 grad: 0.0909 (0.1004) loss: 0.8214 (0.8335) time: 0.1598 data: 0.0706 max mem: 9377 +Train: [49] [ 200/6250] eta: 0:18:10 lr: 0.000070 grad: 0.0798 (0.0947) loss: 0.8299 (0.8287) time: 0.1397 data: 0.0420 max mem: 9377 +Train: [49] [ 300/6250] eta: 0:17:00 lr: 0.000070 grad: 0.0897 (0.0930) loss: 0.8201 (0.8269) time: 0.1346 data: 0.0404 max mem: 9377 +Train: [49] [ 400/6250] eta: 0:16:10 lr: 0.000070 grad: 0.0843 (0.0922) loss: 0.8243 (0.8254) time: 0.1466 data: 0.0616 max mem: 9377 +Train: [49] [ 500/6250] eta: 0:15:29 lr: 0.000070 grad: 0.0925 (0.0920) loss: 0.8249 (0.8241) time: 0.1556 data: 0.0675 max mem: 9377 +Train: [49] [ 600/6250] eta: 0:15:09 lr: 0.000070 grad: 0.0771 (0.0914) loss: 0.8247 (0.8236) time: 0.1741 data: 0.0904 max mem: 9377 +Train: [49] [ 700/6250] eta: 0:14:44 lr: 0.000069 grad: 0.0837 (0.0908) loss: 0.8214 (0.8235) time: 0.1598 data: 0.0718 max mem: 9377 +Train: [49] [ 800/6250] eta: 0:14:21 lr: 0.000069 grad: 0.0865 (0.0905) loss: 0.8114 (0.8230) time: 0.1560 data: 0.0705 max mem: 9377 +Train: [49] [ 900/6250] eta: 0:13:58 lr: 0.000069 grad: 0.0841 (0.0903) loss: 0.8189 (0.8227) time: 0.1369 data: 0.0423 max mem: 9377 +Train: [49] [1000/6250] eta: 0:13:38 lr: 0.000069 grad: 0.0853 (0.0899) loss: 0.8229 (0.8226) time: 0.1419 data: 0.0486 max mem: 9377 +Train: [49] [1100/6250] eta: 0:13:19 lr: 0.000069 grad: 0.0858 (0.0898) loss: 0.8203 (0.8222) time: 0.1491 data: 0.0590 max mem: 9377 +Train: [49] [1200/6250] eta: 0:12:58 lr: 0.000069 grad: 0.0854 (0.0896) loss: 0.8190 (0.8218) time: 0.1043 data: 0.0212 max mem: 9377 +Train: [49] [1300/6250] eta: 0:12:35 lr: 0.000069 grad: 0.0881 (0.0897) loss: 0.8163 (0.8215) time: 0.1498 data: 0.0634 max mem: 9377 +Train: [49] [1400/6250] eta: 0:12:20 lr: 0.000069 grad: 0.0911 (0.0899) loss: 0.8113 (0.8210) time: 0.1613 data: 0.0839 max mem: 9377 +Train: [49] [1500/6250] eta: 0:12:11 lr: 0.000069 grad: 0.0887 (0.0899) loss: 0.8140 (0.8204) time: 0.1832 data: 0.1029 max mem: 9377 +Train: [49] [1600/6250] eta: 0:12:02 lr: 0.000069 grad: 0.0892 (0.0901) loss: 0.8138 (0.8199) time: 0.1862 data: 0.1059 max mem: 9377 +Train: [49] [1700/6250] eta: 0:11:57 lr: 0.000069 grad: 0.0941 (0.0904) loss: 0.8185 (0.8195) time: 0.1839 data: 0.1048 max mem: 9377 +Train: [49] [1800/6250] eta: 0:11:45 lr: 0.000069 grad: 0.0853 (0.0905) loss: 0.8182 (0.8191) time: 0.1704 data: 0.0895 max mem: 9377 +Train: [49] [1900/6250] eta: 0:11:26 lr: 0.000069 grad: 0.0883 (0.0906) loss: 0.8179 (0.8189) time: 0.1481 data: 0.0652 max mem: 9377 +Train: [49] [2000/6250] eta: 0:11:07 lr: 0.000069 grad: 0.0875 (0.0906) loss: 0.8110 (0.8186) time: 0.1673 data: 0.0846 max mem: 9377 +Train: [49] [2100/6250] eta: 0:10:51 lr: 0.000069 grad: 0.0913 (0.0909) loss: 0.8120 (0.8183) time: 0.1768 data: 0.0919 max mem: 9377 +Train: [49] [2200/6250] eta: 0:10:39 lr: 0.000069 grad: 0.0904 (0.0911) loss: 0.8099 (0.8180) time: 0.1690 data: 0.0800 max mem: 9377 +Train: [49] [2300/6250] eta: 0:10:22 lr: 0.000069 grad: 0.0872 (0.0912) loss: 0.8143 (0.8179) time: 0.1408 data: 0.0573 max mem: 9377 +Train: [49] [2400/6250] eta: 0:10:06 lr: 0.000069 grad: 0.0904 (0.0915) loss: 0.8129 (0.8177) time: 0.1646 data: 0.0800 max mem: 9377 +Train: [49] [2500/6250] eta: 0:09:50 lr: 0.000069 grad: 0.0944 (0.0918) loss: 0.8131 (0.8175) time: 0.1520 data: 0.0593 max mem: 9377 +Train: [49] [2600/6250] eta: 0:09:33 lr: 0.000069 grad: 0.0989 (0.0919) loss: 0.8064 (0.8172) time: 0.1530 data: 0.0729 max mem: 9377 +Train: [49] [2700/6250] eta: 0:09:15 lr: 0.000069 grad: 0.0958 (0.0920) loss: 0.8090 (0.8171) time: 0.1371 data: 0.0540 max mem: 9377 +Train: [49] [2800/6250] eta: 0:08:57 lr: 0.000069 grad: 0.0965 (0.0922) loss: 0.8118 (0.8169) time: 0.1362 data: 0.0513 max mem: 9377 +Train: [49] [2900/6250] eta: 0:08:39 lr: 0.000069 grad: 0.0889 (0.0924) loss: 0.8153 (0.8167) time: 0.1469 data: 0.0599 max mem: 9377 +Train: [49] [3000/6250] eta: 0:08:21 lr: 0.000069 grad: 0.0857 (0.0925) loss: 0.8147 (0.8167) time: 0.1297 data: 0.0386 max mem: 9377 +Train: [49] [3100/6250] eta: 0:08:05 lr: 0.000069 grad: 0.0919 (0.0925) loss: 0.8144 (0.8165) time: 0.1323 data: 0.0504 max mem: 9377 +Train: [49] [3200/6250] eta: 0:07:49 lr: 0.000069 grad: 0.0960 (0.0927) loss: 0.8179 (0.8165) time: 0.1593 data: 0.0720 max mem: 9377 +Train: [49] [3300/6250] eta: 0:07:32 lr: 0.000069 grad: 0.0905 (0.0928) loss: 0.8138 (0.8164) time: 0.1394 data: 0.0543 max mem: 9377 +Train: [49] [3400/6250] eta: 0:07:15 lr: 0.000069 grad: 0.0847 (0.0927) loss: 0.8214 (0.8164) time: 0.1542 data: 0.0762 max mem: 9377 +Train: [49] [3500/6250] eta: 0:07:00 lr: 0.000069 grad: 0.0870 (0.0928) loss: 0.8242 (0.8165) time: 0.1450 data: 0.0638 max mem: 9377 +Train: [49] [3600/6250] eta: 0:06:43 lr: 0.000069 grad: 0.0880 (0.0928) loss: 0.8161 (0.8164) time: 0.1243 data: 0.0372 max mem: 9377 +Train: [49] [3700/6250] eta: 0:06:27 lr: 0.000069 grad: 0.0891 (0.0929) loss: 0.8185 (0.8164) time: 0.1287 data: 0.0454 max mem: 9377 +Train: [49] [3800/6250] eta: 0:06:12 lr: 0.000068 grad: 0.0993 (0.0929) loss: 0.8122 (0.8164) time: 0.1454 data: 0.0626 max mem: 9377 +Train: [49] [3900/6250] eta: 0:05:56 lr: 0.000068 grad: 0.0996 (0.0930) loss: 0.8205 (0.8164) time: 0.1458 data: 0.0589 max mem: 9377 +Train: [49] [4000/6250] eta: 0:05:42 lr: 0.000068 grad: 0.0923 (0.0931) loss: 0.8177 (0.8164) time: 0.1798 data: 0.1010 max mem: 9377 +Train: [49] [4100/6250] eta: 0:05:26 lr: 0.000068 grad: 0.0900 (0.0931) loss: 0.8224 (0.8164) time: 0.1476 data: 0.0666 max mem: 9377 +Train: [49] [4200/6250] eta: 0:05:11 lr: 0.000068 grad: 0.0850 (0.0931) loss: 0.8224 (0.8164) time: 0.1656 data: 0.0818 max mem: 9377 +Train: [49] [4300/6250] eta: 0:04:56 lr: 0.000068 grad: 0.0912 (0.0930) loss: 0.8180 (0.8165) time: 0.1568 data: 0.0760 max mem: 9377 +Train: [49] [4400/6250] eta: 0:04:41 lr: 0.000068 grad: 0.0897 (0.0930) loss: 0.8201 (0.8165) time: 0.1698 data: 0.0886 max mem: 9377 +Train: [49] [4500/6250] eta: 0:04:26 lr: 0.000068 grad: 0.0876 (0.0929) loss: 0.8252 (0.8166) time: 0.1545 data: 0.0735 max mem: 9377 +Train: [49] [4600/6250] eta: 0:04:10 lr: 0.000068 grad: 0.0943 (0.0928) loss: 0.8197 (0.8167) time: 0.1338 data: 0.0486 max mem: 9377 +Train: [49] [4700/6250] eta: 0:03:55 lr: 0.000068 grad: 0.0941 (0.0929) loss: 0.8170 (0.8167) time: 0.1565 data: 0.0790 max mem: 9377 +Train: [49] [4800/6250] eta: 0:03:39 lr: 0.000068 grad: 0.0879 (0.0929) loss: 0.8184 (0.8167) time: 0.1505 data: 0.0698 max mem: 9377 +Train: [49] [4900/6250] eta: 0:03:24 lr: 0.000068 grad: 0.0859 (0.0929) loss: 0.8235 (0.8168) time: 0.1326 data: 0.0488 max mem: 9377 +Train: [49] [5000/6250] eta: 0:03:09 lr: 0.000068 grad: 0.0926 (0.0930) loss: 0.8214 (0.8167) time: 0.1257 data: 0.0400 max mem: 9377 +Train: [49] [5100/6250] eta: 0:02:53 lr: 0.000068 grad: 0.0892 (0.0930) loss: 0.8220 (0.8167) time: 0.1275 data: 0.0498 max mem: 9377 +Train: [49] [5200/6250] eta: 0:02:38 lr: 0.000068 grad: 0.0942 (0.0930) loss: 0.8196 (0.8168) time: 0.1402 data: 0.0580 max mem: 9377 +Train: [49] [5300/6250] eta: 0:02:23 lr: 0.000068 grad: 0.0892 (0.0932) loss: 0.8192 (0.8168) time: 0.1619 data: 0.0813 max mem: 9377 +Train: [49] [5400/6250] eta: 0:02:08 lr: 0.000068 grad: 0.0937 (0.0932) loss: 0.8144 (0.8167) time: 0.1538 data: 0.0678 max mem: 9377 +Train: [49] [5500/6250] eta: 0:01:53 lr: 0.000068 grad: 0.0887 (0.0932) loss: 0.8165 (0.8168) time: 0.1684 data: 0.0917 max mem: 9377 +Train: [49] [5600/6250] eta: 0:01:38 lr: 0.000068 grad: 0.0928 (0.0932) loss: 0.8213 (0.8168) time: 0.1672 data: 0.0837 max mem: 9377 +Train: [49] [5700/6250] eta: 0:01:23 lr: 0.000068 grad: 0.1004 (0.0933) loss: 0.8113 (0.8168) time: 0.1593 data: 0.0783 max mem: 9377 +Train: [49] [5800/6250] eta: 0:01:07 lr: 0.000068 grad: 0.0932 (0.0933) loss: 0.8134 (0.8168) time: 0.1270 data: 0.0448 max mem: 9377 +Train: [49] [5900/6250] eta: 0:00:52 lr: 0.000068 grad: 0.0908 (0.0933) loss: 0.8082 (0.8168) time: 0.1628 data: 0.0740 max mem: 9377 +Train: [49] [6000/6250] eta: 0:00:37 lr: 0.000068 grad: 0.0946 (0.0933) loss: 0.8133 (0.8168) time: 0.1277 data: 0.0463 max mem: 9377 +Train: [49] [6100/6250] eta: 0:00:22 lr: 0.000068 grad: 0.0904 (0.0933) loss: 0.8176 (0.8168) time: 0.1392 data: 0.0613 max mem: 9377 +Train: [49] [6200/6250] eta: 0:00:07 lr: 0.000068 grad: 0.0879 (0.0934) loss: 0.8147 (0.8168) time: 0.1468 data: 0.0644 max mem: 9377 +Train: [49] [6249/6250] eta: 0:00:00 lr: 0.000068 grad: 0.0869 (0.0934) loss: 0.8155 (0.8168) time: 0.1169 data: 0.0387 max mem: 9377 +Train: [49] Total time: 0:15:50 (0.1520 s / it) +Averaged stats: lr: 0.000068 grad: 0.0869 (0.0934) loss: 0.8155 (0.8168) +Eval (hcp-train-subset): [49] [ 0/62] eta: 0:05:41 loss: 0.8323 (0.8323) time: 5.5092 data: 5.4734 max mem: 9377 +Eval (hcp-train-subset): [49] [61/62] eta: 0:00:00 loss: 0.8326 (0.8313) time: 0.1398 data: 0.1126 max mem: 9377 +Eval (hcp-train-subset): [49] Total time: 0:00:15 (0.2577 s / it) +Averaged stats (hcp-train-subset): loss: 0.8326 (0.8313) +Making plots (hcp-train-subset): example=50 +Eval (hcp-val): [49] [ 0/62] eta: 0:05:22 loss: 0.8334 (0.8334) time: 5.2064 data: 5.1757 max mem: 9377 +Eval (hcp-val): [49] [61/62] eta: 0:00:00 loss: 0.8342 (0.8355) time: 0.1398 data: 0.1147 max mem: 9377 +Eval (hcp-val): [49] Total time: 0:00:14 (0.2355 s / it) +Averaged stats (hcp-val): loss: 0.8342 (0.8355) +Making plots (hcp-val): example=23 +Eval (nsd-val): [49] [ 0/62] eta: 0:04:21 loss: 0.7995 (0.7995) time: 4.2192 data: 4.1469 max mem: 9377 +Eval (nsd-val): [49] [61/62] eta: 0:00:00 loss: 0.8085 (0.8106) time: 0.1151 data: 0.0900 max mem: 9377 +Eval (nsd-val): [49] Total time: 0:00:15 (0.2443 s / it) +Averaged stats (nsd-val): loss: 0.8085 (0.8106) +Making plots (nsd-val): example=14 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00049.pth +Train: [50] [ 0/6250] eta: 10:25:28 lr: 0.000068 grad: 0.2418 (0.2418) loss: 0.8525 (0.8525) time: 6.0046 data: 5.8539 max mem: 9377 +Train: [50] [ 100/6250] eta: 0:23:21 lr: 0.000068 grad: 0.0875 (0.1085) loss: 0.8289 (0.8232) time: 0.1890 data: 0.0862 max mem: 9377 +Train: [50] [ 200/6250] eta: 0:19:25 lr: 0.000068 grad: 0.0854 (0.1004) loss: 0.8283 (0.8247) time: 0.1392 data: 0.0509 max mem: 9377 +Train: [50] [ 300/6250] eta: 0:18:01 lr: 0.000068 grad: 0.0817 (0.0992) loss: 0.8195 (0.8239) time: 0.1677 data: 0.0801 max mem: 9377 +Train: [50] [ 400/6250] eta: 0:16:58 lr: 0.000068 grad: 0.0857 (0.0963) loss: 0.8248 (0.8239) time: 0.1341 data: 0.0461 max mem: 9377 +Train: [50] [ 500/6250] eta: 0:16:16 lr: 0.000067 grad: 0.0832 (0.0947) loss: 0.8262 (0.8241) time: 0.1549 data: 0.0609 max mem: 9377 +Train: [50] [ 600/6250] eta: 0:15:47 lr: 0.000067 grad: 0.0869 (0.0941) loss: 0.8149 (0.8232) time: 0.1601 data: 0.0796 max mem: 9377 +Train: [50] [ 700/6250] eta: 0:15:15 lr: 0.000067 grad: 0.0873 (0.0933) loss: 0.8186 (0.8225) time: 0.1479 data: 0.0509 max mem: 9377 +Train: [50] [ 800/6250] eta: 0:14:46 lr: 0.000067 grad: 0.0833 (0.0927) loss: 0.8139 (0.8220) time: 0.1398 data: 0.0431 max mem: 9377 +Train: [50] [ 900/6250] eta: 0:14:25 lr: 0.000067 grad: 0.0864 (0.0923) loss: 0.8164 (0.8217) time: 0.1495 data: 0.0568 max mem: 9377 +Train: [50] [1000/6250] eta: 0:13:54 lr: 0.000067 grad: 0.0840 (0.0919) loss: 0.8244 (0.8214) time: 0.1214 data: 0.0379 max mem: 9377 +Train: [50] [1100/6250] eta: 0:13:33 lr: 0.000067 grad: 0.0863 (0.0919) loss: 0.8172 (0.8210) time: 0.1650 data: 0.0822 max mem: 9377 +Train: [50] [1200/6250] eta: 0:13:09 lr: 0.000067 grad: 0.0930 (0.0920) loss: 0.8157 (0.8206) time: 0.1307 data: 0.0473 max mem: 9377 +Train: [50] [1300/6250] eta: 0:12:51 lr: 0.000067 grad: 0.0877 (0.0921) loss: 0.8149 (0.8203) time: 0.1386 data: 0.0539 max mem: 9377 +Train: [50] [1400/6250] eta: 0:12:32 lr: 0.000067 grad: 0.0878 (0.0920) loss: 0.8228 (0.8201) time: 0.1380 data: 0.0562 max mem: 9377 +Train: [50] [1500/6250] eta: 0:12:13 lr: 0.000067 grad: 0.0840 (0.0918) loss: 0.8169 (0.8201) time: 0.1470 data: 0.0644 max mem: 9377 +Train: [50] [1600/6250] eta: 0:11:56 lr: 0.000067 grad: 0.0898 (0.0918) loss: 0.8225 (0.8199) time: 0.1493 data: 0.0661 max mem: 9377 +Train: [50] [1700/6250] eta: 0:11:38 lr: 0.000067 grad: 0.0886 (0.0917) loss: 0.8186 (0.8199) time: 0.1741 data: 0.0934 max mem: 9377 +Train: [50] [1800/6250] eta: 0:11:25 lr: 0.000067 grad: 0.0836 (0.0917) loss: 0.8210 (0.8197) time: 0.1448 data: 0.0702 max mem: 9377 +Train: [50] [1900/6250] eta: 0:11:09 lr: 0.000067 grad: 0.0932 (0.0916) loss: 0.8139 (0.8195) time: 0.1479 data: 0.0720 max mem: 9377 +Train: [50] [2000/6250] eta: 0:10:51 lr: 0.000067 grad: 0.0909 (0.0916) loss: 0.8221 (0.8196) time: 0.1430 data: 0.0683 max mem: 9377 +Train: [50] [2100/6250] eta: 0:10:35 lr: 0.000067 grad: 0.0895 (0.0916) loss: 0.8121 (0.8195) time: 0.1502 data: 0.0634 max mem: 9377 +Train: [50] [2200/6250] eta: 0:10:18 lr: 0.000067 grad: 0.0899 (0.0916) loss: 0.8202 (0.8194) time: 0.1399 data: 0.0594 max mem: 9377 +Train: [50] [2300/6250] eta: 0:10:00 lr: 0.000067 grad: 0.0843 (0.0915) loss: 0.8216 (0.8193) time: 0.1107 data: 0.0237 max mem: 9377 +Train: [50] [2400/6250] eta: 0:09:43 lr: 0.000067 grad: 0.0825 (0.0914) loss: 0.8227 (0.8193) time: 0.1347 data: 0.0488 max mem: 9377 +Train: [50] [2500/6250] eta: 0:09:25 lr: 0.000067 grad: 0.0827 (0.0913) loss: 0.8219 (0.8194) time: 0.1326 data: 0.0467 max mem: 9377 +Train: [50] [2600/6250] eta: 0:09:07 lr: 0.000067 grad: 0.0820 (0.0911) loss: 0.8288 (0.8195) time: 0.1275 data: 0.0471 max mem: 9377 +Train: [50] [2700/6250] eta: 0:08:49 lr: 0.000067 grad: 0.0825 (0.0910) loss: 0.8176 (0.8196) time: 0.1413 data: 0.0613 max mem: 9377 +Train: [50] [2800/6250] eta: 0:08:34 lr: 0.000067 grad: 0.0882 (0.0909) loss: 0.8198 (0.8197) time: 0.1499 data: 0.0734 max mem: 9377 +Train: [50] [2900/6250] eta: 0:08:20 lr: 0.000067 grad: 0.0911 (0.0909) loss: 0.8151 (0.8197) time: 0.1629 data: 0.0835 max mem: 9377 +Train: [50] [3000/6250] eta: 0:08:07 lr: 0.000067 grad: 0.0919 (0.0909) loss: 0.8238 (0.8198) time: 0.1666 data: 0.0888 max mem: 9377 +Train: [50] [3100/6250] eta: 0:07:53 lr: 0.000067 grad: 0.0853 (0.0909) loss: 0.8216 (0.8199) time: 0.1445 data: 0.0675 max mem: 9377 +Train: [50] [3200/6250] eta: 0:07:39 lr: 0.000067 grad: 0.0912 (0.0909) loss: 0.8192 (0.8199) time: 0.1556 data: 0.0767 max mem: 9377 +Train: [50] [3300/6250] eta: 0:07:24 lr: 0.000067 grad: 0.0873 (0.0909) loss: 0.8209 (0.8200) time: 0.1589 data: 0.0769 max mem: 9377 +Train: [50] [3400/6250] eta: 0:07:09 lr: 0.000067 grad: 0.0901 (0.0910) loss: 0.8195 (0.8199) time: 0.1382 data: 0.0554 max mem: 9377 +Train: [50] [3500/6250] eta: 0:06:53 lr: 0.000067 grad: 0.0909 (0.0909) loss: 0.8226 (0.8200) time: 0.1346 data: 0.0492 max mem: 9377 +Train: [50] [3600/6250] eta: 0:06:37 lr: 0.000066 grad: 0.0902 (0.0910) loss: 0.8242 (0.8200) time: 0.1237 data: 0.0352 max mem: 9377 +Train: [50] [3700/6250] eta: 0:06:22 lr: 0.000066 grad: 0.0860 (0.0909) loss: 0.8232 (0.8201) time: 0.1435 data: 0.0651 max mem: 9377 +Train: [50] [3800/6250] eta: 0:06:06 lr: 0.000066 grad: 0.0882 (0.0909) loss: 0.8144 (0.8200) time: 0.1223 data: 0.0379 max mem: 9377 +Train: [50] [3900/6250] eta: 0:05:51 lr: 0.000066 grad: 0.0903 (0.0908) loss: 0.8164 (0.8200) time: 0.1325 data: 0.0447 max mem: 9377 +Train: [50] [4000/6250] eta: 0:05:37 lr: 0.000066 grad: 0.0862 (0.0909) loss: 0.8162 (0.8199) time: 0.1971 data: 0.0454 max mem: 9377 +Train: [50] [4100/6250] eta: 0:05:21 lr: 0.000066 grad: 0.0870 (0.0909) loss: 0.8168 (0.8199) time: 0.1291 data: 0.0403 max mem: 9377 +Train: [50] [4200/6250] eta: 0:05:05 lr: 0.000066 grad: 0.0971 (0.0909) loss: 0.8129 (0.8199) time: 0.1283 data: 0.0494 max mem: 9377 +Train: [50] [4300/6250] eta: 0:04:50 lr: 0.000066 grad: 0.0934 (0.0910) loss: 0.8189 (0.8198) time: 0.1366 data: 0.0611 max mem: 9377 +Train: [50] [4400/6250] eta: 0:04:35 lr: 0.000066 grad: 0.0889 (0.0910) loss: 0.8189 (0.8198) time: 0.1438 data: 0.0642 max mem: 9377 +Train: [50] [4500/6250] eta: 0:04:20 lr: 0.000066 grad: 0.0939 (0.0911) loss: 0.8147 (0.8197) time: 0.1415 data: 0.0613 max mem: 9377 +Train: [50] [4600/6250] eta: 0:04:05 lr: 0.000066 grad: 0.0930 (0.0912) loss: 0.8136 (0.8197) time: 0.1515 data: 0.0662 max mem: 9377 +Train: [50] [4700/6250] eta: 0:03:50 lr: 0.000066 grad: 0.0949 (0.0913) loss: 0.8161 (0.8197) time: 0.1249 data: 0.0371 max mem: 9377 +Train: [50] [4800/6250] eta: 0:03:35 lr: 0.000066 grad: 0.0927 (0.0914) loss: 0.8166 (0.8196) time: 0.1679 data: 0.0895 max mem: 9377 +Train: [50] [4900/6250] eta: 0:03:20 lr: 0.000066 grad: 0.0904 (0.0914) loss: 0.8148 (0.8195) time: 0.1333 data: 0.0517 max mem: 9377 +Train: [50] [5000/6250] eta: 0:03:05 lr: 0.000066 grad: 0.0929 (0.0915) loss: 0.8158 (0.8195) time: 0.1556 data: 0.0753 max mem: 9377 +Train: [50] [5100/6250] eta: 0:02:50 lr: 0.000066 grad: 0.0908 (0.0915) loss: 0.8214 (0.8195) time: 0.1343 data: 0.0519 max mem: 9377 +Train: [50] [5200/6250] eta: 0:02:35 lr: 0.000066 grad: 0.0929 (0.0917) loss: 0.8143 (0.8194) time: 0.1479 data: 0.0679 max mem: 9377 +Train: [50] [5300/6250] eta: 0:02:20 lr: 0.000066 grad: 0.0979 (0.0918) loss: 0.8210 (0.8193) time: 0.1445 data: 0.0581 max mem: 9377 +Train: [50] [5400/6250] eta: 0:02:05 lr: 0.000066 grad: 0.0928 (0.0918) loss: 0.8190 (0.8193) time: 0.1419 data: 0.0540 max mem: 9377 +Train: [50] [5500/6250] eta: 0:01:50 lr: 0.000066 grad: 0.0975 (0.0920) loss: 0.8102 (0.8192) time: 0.1252 data: 0.0397 max mem: 9377 +Train: [50] [5600/6250] eta: 0:01:36 lr: 0.000066 grad: 0.0902 (0.0921) loss: 0.8238 (0.8191) time: 0.1275 data: 0.0422 max mem: 9377 +Train: [50] [5700/6250] eta: 0:01:21 lr: 0.000066 grad: 0.0896 (0.0922) loss: 0.8114 (0.8190) time: 0.1253 data: 0.0398 max mem: 9377 +Train: [50] [5800/6250] eta: 0:01:06 lr: 0.000066 grad: 0.0987 (0.0923) loss: 0.8209 (0.8190) time: 0.1355 data: 0.0534 max mem: 9377 +Train: [50] [5900/6250] eta: 0:00:51 lr: 0.000066 grad: 0.0951 (0.0924) loss: 0.8225 (0.8189) time: 0.1810 data: 0.0993 max mem: 9377 +Train: [50] [6000/6250] eta: 0:00:36 lr: 0.000066 grad: 0.0911 (0.0924) loss: 0.8182 (0.8189) time: 0.1554 data: 0.0709 max mem: 9377 +Train: [50] [6100/6250] eta: 0:00:22 lr: 0.000066 grad: 0.0928 (0.0925) loss: 0.8244 (0.8188) time: 0.1755 data: 0.0952 max mem: 9377 +Train: [50] [6200/6250] eta: 0:00:07 lr: 0.000066 grad: 0.1065 (0.0926) loss: 0.8150 (0.8188) time: 0.1675 data: 0.0865 max mem: 9377 +Train: [50] [6249/6250] eta: 0:00:00 lr: 0.000066 grad: 0.0878 (0.0926) loss: 0.8193 (0.8188) time: 0.1541 data: 0.0739 max mem: 9377 +Train: [50] Total time: 0:15:25 (0.1481 s / it) +Averaged stats: lr: 0.000066 grad: 0.0878 (0.0926) loss: 0.8193 (0.8188) +Eval (hcp-train-subset): [50] [ 0/62] eta: 0:04:01 loss: 0.8335 (0.8335) time: 3.8890 data: 3.7915 max mem: 9377 +Eval (hcp-train-subset): [50] [61/62] eta: 0:00:00 loss: 0.8305 (0.8323) time: 0.1344 data: 0.1089 max mem: 9377 +Eval (hcp-train-subset): [50] Total time: 0:00:14 (0.2385 s / it) +Averaged stats (hcp-train-subset): loss: 0.8305 (0.8323) +Eval (hcp-val): [50] [ 0/62] eta: 0:04:28 loss: 0.8296 (0.8296) time: 4.3226 data: 4.2445 max mem: 9377 +Eval (hcp-val): [50] [61/62] eta: 0:00:00 loss: 0.8350 (0.8364) time: 0.1293 data: 0.1017 max mem: 9377 +Eval (hcp-val): [50] Total time: 0:00:13 (0.2157 s / it) +Averaged stats (hcp-val): loss: 0.8350 (0.8364) +Eval (nsd-val): [50] [ 0/62] eta: 0:04:28 loss: 0.7989 (0.7989) time: 4.3249 data: 4.2481 max mem: 9377 +Eval (nsd-val): [50] [61/62] eta: 0:00:00 loss: 0.8100 (0.8111) time: 0.1515 data: 0.1235 max mem: 9377 +Eval (nsd-val): [50] Total time: 0:00:13 (0.2255 s / it) +Averaged stats (nsd-val): loss: 0.8100 (0.8111) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [51] [ 0/6250] eta: 8:20:50 lr: 0.000066 grad: 0.0554 (0.0554) loss: 0.8544 (0.8544) time: 4.8080 data: 4.5806 max mem: 9377 +Train: [51] [ 100/6250] eta: 0:21:06 lr: 0.000066 grad: 0.1069 (0.1146) loss: 0.8185 (0.8278) time: 0.1450 data: 0.0421 max mem: 9377 +Train: [51] [ 200/6250] eta: 0:18:42 lr: 0.000066 grad: 0.0938 (0.1078) loss: 0.8281 (0.8249) time: 0.1596 data: 0.0689 max mem: 9377 +Train: [51] [ 300/6250] eta: 0:17:46 lr: 0.000065 grad: 0.1027 (0.1061) loss: 0.8156 (0.8234) time: 0.1545 data: 0.0738 max mem: 9377 +Train: [51] [ 400/6250] eta: 0:17:16 lr: 0.000065 grad: 0.0905 (0.1031) loss: 0.8219 (0.8230) time: 0.1636 data: 0.0665 max mem: 9377 +Train: [51] [ 500/6250] eta: 0:16:52 lr: 0.000065 grad: 0.0966 (0.1013) loss: 0.8177 (0.8228) time: 0.2117 data: 0.1208 max mem: 9377 +Train: [51] [ 600/6250] eta: 0:16:25 lr: 0.000065 grad: 0.0899 (0.1002) loss: 0.8228 (0.8218) time: 0.1846 data: 0.0979 max mem: 9377 +Train: [51] [ 700/6250] eta: 0:15:49 lr: 0.000065 grad: 0.0926 (0.0991) loss: 0.8176 (0.8213) time: 0.1508 data: 0.0629 max mem: 9377 +Train: [51] [ 800/6250] eta: 0:15:15 lr: 0.000065 grad: 0.0826 (0.0978) loss: 0.8192 (0.8211) time: 0.1460 data: 0.0627 max mem: 9377 +Train: [51] [ 900/6250] eta: 0:14:50 lr: 0.000065 grad: 0.0821 (0.0972) loss: 0.8208 (0.8210) time: 0.1533 data: 0.0683 max mem: 9377 +Train: [51] [1000/6250] eta: 0:14:29 lr: 0.000065 grad: 0.0946 (0.0967) loss: 0.8174 (0.8208) time: 0.1816 data: 0.0982 max mem: 9377 +Train: [51] [1100/6250] eta: 0:14:03 lr: 0.000065 grad: 0.0878 (0.0959) loss: 0.8163 (0.8206) time: 0.1573 data: 0.0724 max mem: 9377 +Train: [51] [1200/6250] eta: 0:13:40 lr: 0.000065 grad: 0.0862 (0.0952) loss: 0.8174 (0.8204) time: 0.1512 data: 0.0653 max mem: 9377 +Train: [51] [1300/6250] eta: 0:13:21 lr: 0.000065 grad: 0.0835 (0.0951) loss: 0.8258 (0.8205) time: 0.1430 data: 0.0583 max mem: 9377 +Train: [51] [1400/6250] eta: 0:13:00 lr: 0.000065 grad: 0.0903 (0.0948) loss: 0.8134 (0.8203) time: 0.1396 data: 0.0520 max mem: 9377 +Train: [51] [1500/6250] eta: 0:12:38 lr: 0.000065 grad: 0.0855 (0.0947) loss: 0.8263 (0.8204) time: 0.1474 data: 0.0701 max mem: 9377 +Train: [51] [1600/6250] eta: 0:12:25 lr: 0.000065 grad: 0.0860 (0.0945) loss: 0.8261 (0.8205) time: 0.1873 data: 0.1079 max mem: 9377 +Train: [51] [1700/6250] eta: 0:12:10 lr: 0.000065 grad: 0.0958 (0.0944) loss: 0.8150 (0.8204) time: 0.1578 data: 0.0741 max mem: 9377 +Train: [51] [1800/6250] eta: 0:11:54 lr: 0.000065 grad: 0.0841 (0.0942) loss: 0.8211 (0.8203) time: 0.1752 data: 0.0951 max mem: 9377 +Train: [51] [1900/6250] eta: 0:11:37 lr: 0.000065 grad: 0.0907 (0.0942) loss: 0.8190 (0.8203) time: 0.1503 data: 0.0707 max mem: 9377 +Train: [51] [2000/6250] eta: 0:11:19 lr: 0.000065 grad: 0.0886 (0.0943) loss: 0.8172 (0.8201) time: 0.1442 data: 0.0598 max mem: 9377 +Train: [51] [2100/6250] eta: 0:11:01 lr: 0.000065 grad: 0.0893 (0.0942) loss: 0.8182 (0.8200) time: 0.1504 data: 0.0576 max mem: 9377 +Train: [51] [2200/6250] eta: 0:10:43 lr: 0.000065 grad: 0.0938 (0.0942) loss: 0.8193 (0.8199) time: 0.1421 data: 0.0551 max mem: 9377 +Train: [51] [2300/6250] eta: 0:10:26 lr: 0.000065 grad: 0.0919 (0.0941) loss: 0.8109 (0.8197) time: 0.1558 data: 0.0711 max mem: 9377 +Train: [51] [2400/6250] eta: 0:10:08 lr: 0.000065 grad: 0.0950 (0.0942) loss: 0.8153 (0.8195) time: 0.1566 data: 0.0745 max mem: 9377 +Train: [51] [2500/6250] eta: 0:09:49 lr: 0.000065 grad: 0.0917 (0.0941) loss: 0.8206 (0.8195) time: 0.1518 data: 0.0665 max mem: 9377 +Train: [51] [2600/6250] eta: 0:09:30 lr: 0.000065 grad: 0.0935 (0.0942) loss: 0.8177 (0.8194) time: 0.1176 data: 0.0265 max mem: 9377 +Train: [51] [2700/6250] eta: 0:09:11 lr: 0.000065 grad: 0.0904 (0.0943) loss: 0.8199 (0.8193) time: 0.1224 data: 0.0315 max mem: 9377 +Train: [51] [2800/6250] eta: 0:08:54 lr: 0.000065 grad: 0.0888 (0.0944) loss: 0.8200 (0.8191) time: 0.1436 data: 0.0582 max mem: 9377 +Train: [51] [2900/6250] eta: 0:08:37 lr: 0.000065 grad: 0.0946 (0.0946) loss: 0.8179 (0.8190) time: 0.1434 data: 0.0607 max mem: 9377 +Train: [51] [3000/6250] eta: 0:08:23 lr: 0.000065 grad: 0.0930 (0.0947) loss: 0.8185 (0.8188) time: 0.1541 data: 0.0782 max mem: 9377 +Train: [51] [3100/6250] eta: 0:08:06 lr: 0.000065 grad: 0.0879 (0.0947) loss: 0.8212 (0.8187) time: 0.1422 data: 0.0566 max mem: 9377 +Train: [51] [3200/6250] eta: 0:07:51 lr: 0.000065 grad: 0.0927 (0.0948) loss: 0.8131 (0.8187) time: 0.1249 data: 0.0466 max mem: 9377 +Train: [51] [3300/6250] eta: 0:07:35 lr: 0.000065 grad: 0.0972 (0.0948) loss: 0.8090 (0.8186) time: 0.1646 data: 0.0871 max mem: 9377 +Train: [51] [3400/6250] eta: 0:07:18 lr: 0.000064 grad: 0.0925 (0.0949) loss: 0.8184 (0.8186) time: 0.1311 data: 0.0407 max mem: 9377 +Train: [51] [3500/6250] eta: 0:07:02 lr: 0.000064 grad: 0.0919 (0.0949) loss: 0.8172 (0.8186) time: 0.1447 data: 0.0542 max mem: 9377 +Train: [51] [3600/6250] eta: 0:06:46 lr: 0.000064 grad: 0.0901 (0.0949) loss: 0.8223 (0.8186) time: 0.1422 data: 0.0502 max mem: 9377 +Train: [51] [3700/6250] eta: 0:06:29 lr: 0.000064 grad: 0.0887 (0.0948) loss: 0.8180 (0.8186) time: 0.1327 data: 0.0506 max mem: 9377 +Train: [51] [3800/6250] eta: 0:06:14 lr: 0.000064 grad: 0.0906 (0.0948) loss: 0.8252 (0.8186) time: 0.1561 data: 0.0740 max mem: 9377 +Train: [51] [3900/6250] eta: 0:05:59 lr: 0.000064 grad: 0.0862 (0.0948) loss: 0.8219 (0.8186) time: 0.1479 data: 0.0659 max mem: 9377 +Train: [51] [4000/6250] eta: 0:05:43 lr: 0.000064 grad: 0.0930 (0.0949) loss: 0.8184 (0.8186) time: 0.1366 data: 0.0522 max mem: 9377 +Train: [51] [4100/6250] eta: 0:05:27 lr: 0.000064 grad: 0.0904 (0.0949) loss: 0.8175 (0.8186) time: 0.1615 data: 0.0784 max mem: 9377 +Train: [51] [4200/6250] eta: 0:05:12 lr: 0.000064 grad: 0.0905 (0.0948) loss: 0.8167 (0.8186) time: 0.1462 data: 0.0641 max mem: 9377 +Train: [51] [4300/6250] eta: 0:04:56 lr: 0.000064 grad: 0.0885 (0.0948) loss: 0.8187 (0.8186) time: 0.1512 data: 0.0648 max mem: 9377 +Train: [51] [4400/6250] eta: 0:04:41 lr: 0.000064 grad: 0.0949 (0.0948) loss: 0.8149 (0.8185) time: 0.1527 data: 0.0727 max mem: 9377 +Train: [51] [4500/6250] eta: 0:04:25 lr: 0.000064 grad: 0.0933 (0.0948) loss: 0.8154 (0.8184) time: 0.1633 data: 0.0810 max mem: 9377 +Train: [51] [4600/6250] eta: 0:04:10 lr: 0.000064 grad: 0.0930 (0.0948) loss: 0.8155 (0.8183) time: 0.1514 data: 0.0703 max mem: 9377 +Train: [51] [4700/6250] eta: 0:03:55 lr: 0.000064 grad: 0.0914 (0.0948) loss: 0.8115 (0.8183) time: 0.1573 data: 0.0779 max mem: 9377 +Train: [51] [4800/6250] eta: 0:03:39 lr: 0.000064 grad: 0.0908 (0.0948) loss: 0.8204 (0.8182) time: 0.1459 data: 0.0618 max mem: 9377 +Train: [51] [4900/6250] eta: 0:03:24 lr: 0.000064 grad: 0.0916 (0.0948) loss: 0.8155 (0.8181) time: 0.1562 data: 0.0728 max mem: 9377 +Train: [51] [5000/6250] eta: 0:03:09 lr: 0.000064 grad: 0.0898 (0.0948) loss: 0.8206 (0.8181) time: 0.1517 data: 0.0623 max mem: 9377 +Train: [51] [5100/6250] eta: 0:02:53 lr: 0.000064 grad: 0.0896 (0.0948) loss: 0.8186 (0.8180) time: 0.1468 data: 0.0673 max mem: 9377 +Train: [51] [5200/6250] eta: 0:02:38 lr: 0.000064 grad: 0.0914 (0.0948) loss: 0.8131 (0.8180) time: 0.1490 data: 0.0677 max mem: 9377 +Train: [51] [5300/6250] eta: 0:02:23 lr: 0.000064 grad: 0.0890 (0.0949) loss: 0.8223 (0.8179) time: 0.1753 data: 0.0961 max mem: 9377 +Train: [51] [5400/6250] eta: 0:02:08 lr: 0.000064 grad: 0.0937 (0.0949) loss: 0.8135 (0.8178) time: 0.1676 data: 0.0855 max mem: 9377 +Train: [51] [5500/6250] eta: 0:01:53 lr: 0.000064 grad: 0.0914 (0.0949) loss: 0.8118 (0.8177) time: 0.1434 data: 0.0613 max mem: 9377 +Train: [51] [5600/6250] eta: 0:01:38 lr: 0.000064 grad: 0.0908 (0.0949) loss: 0.8053 (0.8176) time: 0.1588 data: 0.0750 max mem: 9377 +Train: [51] [5700/6250] eta: 0:01:22 lr: 0.000064 grad: 0.0876 (0.0949) loss: 0.8183 (0.8176) time: 0.1549 data: 0.0777 max mem: 9377 +Train: [51] [5800/6250] eta: 0:01:07 lr: 0.000064 grad: 0.0900 (0.0949) loss: 0.8173 (0.8175) time: 0.1453 data: 0.0629 max mem: 9377 +Train: [51] [5900/6250] eta: 0:00:52 lr: 0.000064 grad: 0.0882 (0.0948) loss: 0.8157 (0.8175) time: 0.1730 data: 0.0834 max mem: 9377 +Train: [51] [6000/6250] eta: 0:00:37 lr: 0.000064 grad: 0.0851 (0.0948) loss: 0.8256 (0.8175) time: 0.2039 data: 0.1242 max mem: 9377 +Train: [51] [6100/6250] eta: 0:00:22 lr: 0.000064 grad: 0.0950 (0.0948) loss: 0.8130 (0.8174) time: 0.1215 data: 0.0411 max mem: 9377 +Train: [51] [6200/6250] eta: 0:00:07 lr: 0.000064 grad: 0.0920 (0.0948) loss: 0.8162 (0.8174) time: 0.1425 data: 0.0548 max mem: 9377 +Train: [51] [6249/6250] eta: 0:00:00 lr: 0.000064 grad: 0.0869 (0.0948) loss: 0.8158 (0.8173) time: 0.1240 data: 0.0472 max mem: 9377 +Train: [51] Total time: 0:15:48 (0.1518 s / it) +Averaged stats: lr: 0.000064 grad: 0.0869 (0.0948) loss: 0.8158 (0.8173) +Eval (hcp-train-subset): [51] [ 0/62] eta: 0:05:47 loss: 0.8318 (0.8318) time: 5.5990 data: 5.5680 max mem: 9377 +Eval (hcp-train-subset): [51] [61/62] eta: 0:00:00 loss: 0.8313 (0.8313) time: 0.1495 data: 0.1243 max mem: 9377 +Eval (hcp-train-subset): [51] Total time: 0:00:14 (0.2372 s / it) +Averaged stats (hcp-train-subset): loss: 0.8313 (0.8313) +Eval (hcp-val): [51] [ 0/62] eta: 0:03:31 loss: 0.8319 (0.8319) time: 3.4174 data: 3.3557 max mem: 9377 +Eval (hcp-val): [51] [61/62] eta: 0:00:00 loss: 0.8327 (0.8355) time: 0.1450 data: 0.1174 max mem: 9377 +Eval (hcp-val): [51] Total time: 0:00:13 (0.2213 s / it) +Averaged stats (hcp-val): loss: 0.8327 (0.8355) +Eval (nsd-val): [51] [ 0/62] eta: 0:04:26 loss: 0.7992 (0.7992) time: 4.3049 data: 4.2606 max mem: 9377 +Eval (nsd-val): [51] [61/62] eta: 0:00:00 loss: 0.8118 (0.8123) time: 0.1435 data: 0.1168 max mem: 9377 +Eval (nsd-val): [51] Total time: 0:00:13 (0.2161 s / it) +Averaged stats (nsd-val): loss: 0.8118 (0.8123) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [52] [ 0/6250] eta: 9:20:33 lr: 0.000064 grad: 0.0692 (0.0692) loss: 0.8422 (0.8422) time: 5.3814 data: 5.2781 max mem: 9377 +Train: [52] [ 100/6250] eta: 0:20:21 lr: 0.000063 grad: 0.0916 (0.1064) loss: 0.8302 (0.8360) time: 0.1317 data: 0.0169 max mem: 9377 +Train: [52] [ 200/6250] eta: 0:18:07 lr: 0.000063 grad: 0.0930 (0.1051) loss: 0.8250 (0.8282) time: 0.1718 data: 0.0736 max mem: 9377 +Train: [52] [ 300/6250] eta: 0:16:37 lr: 0.000063 grad: 0.0884 (0.1026) loss: 0.8098 (0.8252) time: 0.1422 data: 0.0506 max mem: 9377 +Train: [52] [ 400/6250] eta: 0:15:47 lr: 0.000063 grad: 0.0902 (0.1006) loss: 0.8248 (0.8241) time: 0.1482 data: 0.0634 max mem: 9377 +Train: [52] [ 500/6250] eta: 0:14:58 lr: 0.000063 grad: 0.0933 (0.0996) loss: 0.8099 (0.8234) time: 0.1382 data: 0.0458 max mem: 9377 +Train: [52] [ 600/6250] eta: 0:14:21 lr: 0.000063 grad: 0.0897 (0.0986) loss: 0.8162 (0.8226) time: 0.1239 data: 0.0220 max mem: 9377 +Train: [52] [ 700/6250] eta: 0:13:53 lr: 0.000063 grad: 0.0900 (0.0981) loss: 0.8168 (0.8218) time: 0.1451 data: 0.0511 max mem: 9377 +Train: [52] [ 800/6250] eta: 0:13:31 lr: 0.000063 grad: 0.0968 (0.0976) loss: 0.8237 (0.8214) time: 0.1366 data: 0.0398 max mem: 9377 +Train: [52] [ 900/6250] eta: 0:13:12 lr: 0.000063 grad: 0.0890 (0.0973) loss: 0.8199 (0.8211) time: 0.1478 data: 0.0558 max mem: 9377 +Train: [52] [1000/6250] eta: 0:12:51 lr: 0.000063 grad: 0.0868 (0.0968) loss: 0.8150 (0.8209) time: 0.1386 data: 0.0503 max mem: 9377 +Train: [52] [1100/6250] eta: 0:12:32 lr: 0.000063 grad: 0.0924 (0.0967) loss: 0.8175 (0.8203) time: 0.1343 data: 0.0494 max mem: 9377 +Train: [52] [1200/6250] eta: 0:12:15 lr: 0.000063 grad: 0.0888 (0.0965) loss: 0.8197 (0.8199) time: 0.1365 data: 0.0535 max mem: 9377 +Train: [52] [1300/6250] eta: 0:11:59 lr: 0.000063 grad: 0.0932 (0.0963) loss: 0.8113 (0.8196) time: 0.1452 data: 0.0621 max mem: 9377 +Train: [52] [1400/6250] eta: 0:11:57 lr: 0.000063 grad: 0.1004 (0.0963) loss: 0.8071 (0.8189) time: 0.1593 data: 0.0802 max mem: 9377 +Train: [52] [1500/6250] eta: 0:11:43 lr: 0.000063 grad: 0.1006 (0.0971) loss: 0.8135 (0.8184) time: 0.1158 data: 0.0326 max mem: 9377 +Train: [52] [1600/6250] eta: 0:11:31 lr: 0.000063 grad: 0.0907 (0.0972) loss: 0.8155 (0.8181) time: 0.1652 data: 0.0887 max mem: 9377 +Train: [52] [1700/6250] eta: 0:11:17 lr: 0.000063 grad: 0.0941 (0.0973) loss: 0.8143 (0.8177) time: 0.1542 data: 0.0642 max mem: 9377 +Train: [52] [1800/6250] eta: 0:11:02 lr: 0.000063 grad: 0.0978 (0.0974) loss: 0.8064 (0.8173) time: 0.1290 data: 0.0403 max mem: 9377 +Train: [52] [1900/6250] eta: 0:10:48 lr: 0.000063 grad: 0.0943 (0.0974) loss: 0.8188 (0.8169) time: 0.1590 data: 0.0781 max mem: 9377 +Train: [52] [2000/6250] eta: 0:10:34 lr: 0.000063 grad: 0.0988 (0.0976) loss: 0.8118 (0.8166) time: 0.1652 data: 0.0858 max mem: 9377 +Train: [52] [2100/6250] eta: 0:10:17 lr: 0.000063 grad: 0.0911 (0.0975) loss: 0.8127 (0.8165) time: 0.1432 data: 0.0589 max mem: 9377 +Train: [52] [2200/6250] eta: 0:10:02 lr: 0.000063 grad: 0.0950 (0.0978) loss: 0.8131 (0.8162) time: 0.1308 data: 0.0317 max mem: 9377 +Train: [52] [2300/6250] eta: 0:09:45 lr: 0.000063 grad: 0.0943 (0.0979) loss: 0.8151 (0.8160) time: 0.1477 data: 0.0619 max mem: 9377 +Train: [52] [2400/6250] eta: 0:09:28 lr: 0.000063 grad: 0.0950 (0.0979) loss: 0.8141 (0.8159) time: 0.1321 data: 0.0399 max mem: 9377 +Train: [52] [2500/6250] eta: 0:09:12 lr: 0.000063 grad: 0.0912 (0.0977) loss: 0.8161 (0.8159) time: 0.1410 data: 0.0563 max mem: 9377 +Train: [52] [2600/6250] eta: 0:08:57 lr: 0.000063 grad: 0.0873 (0.0976) loss: 0.8116 (0.8158) time: 0.1154 data: 0.0315 max mem: 9377 +Train: [52] [2700/6250] eta: 0:08:41 lr: 0.000063 grad: 0.0902 (0.0975) loss: 0.8203 (0.8157) time: 0.1323 data: 0.0444 max mem: 9377 +Train: [52] [2800/6250] eta: 0:08:26 lr: 0.000063 grad: 0.0916 (0.0974) loss: 0.8150 (0.8157) time: 0.1549 data: 0.0744 max mem: 9377 +Train: [52] [2900/6250] eta: 0:08:10 lr: 0.000063 grad: 0.0896 (0.0975) loss: 0.8201 (0.8158) time: 0.1424 data: 0.0601 max mem: 9377 +Train: [52] [3000/6250] eta: 0:07:55 lr: 0.000063 grad: 0.0980 (0.0973) loss: 0.8121 (0.8158) time: 0.1309 data: 0.0445 max mem: 9377 +Train: [52] [3100/6250] eta: 0:07:41 lr: 0.000063 grad: 0.0908 (0.0973) loss: 0.8224 (0.8159) time: 0.1620 data: 0.0824 max mem: 9377 +Train: [52] [3200/6250] eta: 0:07:26 lr: 0.000062 grad: 0.0925 (0.0972) loss: 0.8196 (0.8160) time: 0.1463 data: 0.0641 max mem: 9377 +Train: [52] [3300/6250] eta: 0:07:11 lr: 0.000062 grad: 0.0890 (0.0972) loss: 0.8187 (0.8160) time: 0.1802 data: 0.1029 max mem: 9377 +Train: [52] [3400/6250] eta: 0:06:56 lr: 0.000062 grad: 0.0930 (0.0971) loss: 0.8151 (0.8160) time: 0.1487 data: 0.0689 max mem: 9377 +Train: [52] [3500/6250] eta: 0:06:43 lr: 0.000062 grad: 0.0926 (0.0970) loss: 0.8145 (0.8161) time: 0.1403 data: 0.0612 max mem: 9377 +Train: [52] [3600/6250] eta: 0:06:29 lr: 0.000062 grad: 0.0927 (0.0971) loss: 0.8205 (0.8161) time: 0.1531 data: 0.0670 max mem: 9377 +Train: [52] [3700/6250] eta: 0:06:14 lr: 0.000062 grad: 0.0900 (0.0970) loss: 0.8203 (0.8161) time: 0.1362 data: 0.0551 max mem: 9377 +Train: [52] [3800/6250] eta: 0:06:00 lr: 0.000062 grad: 0.0909 (0.0970) loss: 0.8173 (0.8160) time: 0.1497 data: 0.0667 max mem: 9377 +Train: [52] [3900/6250] eta: 0:05:45 lr: 0.000062 grad: 0.0889 (0.0970) loss: 0.8227 (0.8161) time: 0.1846 data: 0.1008 max mem: 9377 +Train: [52] [4000/6250] eta: 0:05:30 lr: 0.000062 grad: 0.0952 (0.0970) loss: 0.8184 (0.8162) time: 0.0978 data: 0.0101 max mem: 9377 +Train: [52] [4100/6250] eta: 0:05:16 lr: 0.000062 grad: 0.0970 (0.0970) loss: 0.8201 (0.8162) time: 0.1510 data: 0.0697 max mem: 9377 +Train: [52] [4200/6250] eta: 0:05:01 lr: 0.000062 grad: 0.0939 (0.0970) loss: 0.8104 (0.8162) time: 0.1568 data: 0.0776 max mem: 9377 +Train: [52] [4300/6250] eta: 0:04:46 lr: 0.000062 grad: 0.0910 (0.0969) loss: 0.8227 (0.8163) time: 0.1572 data: 0.0738 max mem: 9377 +Train: [52] [4400/6250] eta: 0:04:32 lr: 0.000062 grad: 0.0953 (0.0969) loss: 0.8217 (0.8163) time: 0.1653 data: 0.0832 max mem: 9377 +Train: [52] [4500/6250] eta: 0:04:17 lr: 0.000062 grad: 0.0932 (0.0969) loss: 0.8202 (0.8164) time: 0.1521 data: 0.0733 max mem: 9377 +Train: [52] [4600/6250] eta: 0:04:03 lr: 0.000062 grad: 0.0985 (0.0969) loss: 0.8104 (0.8163) time: 0.1342 data: 0.0530 max mem: 9377 +Train: [52] [4700/6250] eta: 0:03:48 lr: 0.000062 grad: 0.0902 (0.0968) loss: 0.8179 (0.8164) time: 0.1439 data: 0.0675 max mem: 9377 +Train: [52] [4800/6250] eta: 0:03:33 lr: 0.000062 grad: 0.0966 (0.0968) loss: 0.8236 (0.8164) time: 0.1319 data: 0.0428 max mem: 9377 +Train: [52] [4900/6250] eta: 0:03:18 lr: 0.000062 grad: 0.0957 (0.0968) loss: 0.8134 (0.8164) time: 0.1474 data: 0.0645 max mem: 9377 +Train: [52] [5000/6250] eta: 0:03:04 lr: 0.000062 grad: 0.0951 (0.0967) loss: 0.8131 (0.8164) time: 0.1474 data: 0.0587 max mem: 9377 +Train: [52] [5100/6250] eta: 0:02:49 lr: 0.000062 grad: 0.0944 (0.0967) loss: 0.8105 (0.8164) time: 0.1288 data: 0.0473 max mem: 9377 +Train: [52] [5200/6250] eta: 0:02:34 lr: 0.000062 grad: 0.0878 (0.0967) loss: 0.8212 (0.8164) time: 0.1494 data: 0.0630 max mem: 9377 +Train: [52] [5300/6250] eta: 0:02:19 lr: 0.000062 grad: 0.0947 (0.0967) loss: 0.8158 (0.8164) time: 0.1363 data: 0.0493 max mem: 9377 +Train: [52] [5400/6250] eta: 0:02:04 lr: 0.000062 grad: 0.0976 (0.0967) loss: 0.8167 (0.8164) time: 0.1230 data: 0.0361 max mem: 9377 +Train: [52] [5500/6250] eta: 0:01:50 lr: 0.000062 grad: 0.0960 (0.0970) loss: 0.8158 (0.8164) time: 0.1268 data: 0.0439 max mem: 9377 +Train: [52] [5600/6250] eta: 0:01:35 lr: 0.000062 grad: 0.0896 (0.0970) loss: 0.8192 (0.8164) time: 0.1501 data: 0.0693 max mem: 9377 +Train: [52] [5700/6250] eta: 0:01:20 lr: 0.000062 grad: 0.0889 (0.0969) loss: 0.8176 (0.8164) time: 0.1456 data: 0.0616 max mem: 9377 +Train: [52] [5800/6250] eta: 0:01:05 lr: 0.000062 grad: 0.0879 (0.0969) loss: 0.8136 (0.8164) time: 0.1479 data: 0.0690 max mem: 9377 +Train: [52] [5900/6250] eta: 0:00:51 lr: 0.000062 grad: 0.0979 (0.0969) loss: 0.8115 (0.8163) time: 0.1529 data: 0.0681 max mem: 9377 +Train: [52] [6000/6250] eta: 0:00:36 lr: 0.000062 grad: 0.0968 (0.0969) loss: 0.8186 (0.8163) time: 0.1451 data: 0.0581 max mem: 9377 +Train: [52] [6100/6250] eta: 0:00:22 lr: 0.000062 grad: 0.0953 (0.0970) loss: 0.8142 (0.8163) time: 0.1581 data: 0.0677 max mem: 9377 +Train: [52] [6200/6250] eta: 0:00:07 lr: 0.000061 grad: 0.0958 (0.0971) loss: 0.8145 (0.8163) time: 0.1604 data: 0.0771 max mem: 9377 +Train: [52] [6249/6250] eta: 0:00:00 lr: 0.000061 grad: 0.0929 (0.0971) loss: 0.8125 (0.8163) time: 0.1512 data: 0.0744 max mem: 9377 +Train: [52] Total time: 0:15:24 (0.1479 s / it) +Averaged stats: lr: 0.000061 grad: 0.0929 (0.0971) loss: 0.8125 (0.8163) +Eval (hcp-train-subset): [52] [ 0/62] eta: 0:03:28 loss: 0.8300 (0.8300) time: 3.3704 data: 3.3067 max mem: 9377 +Eval (hcp-train-subset): [52] [61/62] eta: 0:00:00 loss: 0.8321 (0.8310) time: 0.1381 data: 0.1100 max mem: 9377 +Eval (hcp-train-subset): [52] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (hcp-train-subset): loss: 0.8321 (0.8310) +Eval (hcp-val): [52] [ 0/62] eta: 0:05:27 loss: 0.8312 (0.8312) time: 5.2776 data: 5.2468 max mem: 9377 +Eval (hcp-val): [52] [61/62] eta: 0:00:00 loss: 0.8342 (0.8344) time: 0.1497 data: 0.1230 max mem: 9377 +Eval (hcp-val): [52] Total time: 0:00:13 (0.2143 s / it) +Averaged stats (hcp-val): loss: 0.8342 (0.8344) +Eval (nsd-val): [52] [ 0/62] eta: 0:04:41 loss: 0.8055 (0.8055) time: 4.5343 data: 4.5037 max mem: 9377 +Eval (nsd-val): [52] [61/62] eta: 0:00:00 loss: 0.8126 (0.8147) time: 0.1344 data: 0.1090 max mem: 9377 +Eval (nsd-val): [52] Total time: 0:00:13 (0.2140 s / it) +Averaged stats (nsd-val): loss: 0.8126 (0.8147) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [53] [ 0/6250] eta: 9:35:43 lr: 0.000061 grad: 0.1726 (0.1726) loss: 0.8417 (0.8417) time: 5.5270 data: 5.2491 max mem: 9377 +Train: [53] [ 100/6250] eta: 0:22:58 lr: 0.000061 grad: 0.0950 (0.1119) loss: 0.8290 (0.8269) time: 0.1530 data: 0.0452 max mem: 9377 +Train: [53] [ 200/6250] eta: 0:19:45 lr: 0.000061 grad: 0.0970 (0.1034) loss: 0.8234 (0.8265) time: 0.1614 data: 0.0643 max mem: 9377 +Train: [53] [ 300/6250] eta: 0:17:43 lr: 0.000061 grad: 0.0920 (0.1002) loss: 0.8171 (0.8270) time: 0.1382 data: 0.0414 max mem: 9377 +Train: [53] [ 400/6250] eta: 0:16:34 lr: 0.000061 grad: 0.0910 (0.0984) loss: 0.8096 (0.8253) time: 0.1544 data: 0.0693 max mem: 9377 +Train: [53] [ 500/6250] eta: 0:15:48 lr: 0.000061 grad: 0.0902 (0.0973) loss: 0.8254 (0.8242) time: 0.1403 data: 0.0516 max mem: 9377 +Train: [53] [ 600/6250] eta: 0:15:04 lr: 0.000061 grad: 0.0907 (0.0964) loss: 0.8175 (0.8237) time: 0.1443 data: 0.0466 max mem: 9377 +Train: [53] [ 700/6250] eta: 0:14:29 lr: 0.000061 grad: 0.0822 (0.0954) loss: 0.8252 (0.8238) time: 0.1335 data: 0.0261 max mem: 9377 +Train: [53] [ 800/6250] eta: 0:14:06 lr: 0.000061 grad: 0.0989 (0.0949) loss: 0.8153 (0.8236) time: 0.1561 data: 0.0581 max mem: 9377 +Train: [53] [ 900/6250] eta: 0:13:51 lr: 0.000061 grad: 0.0945 (0.0947) loss: 0.8239 (0.8236) time: 0.1729 data: 0.0850 max mem: 9377 +Train: [53] [1000/6250] eta: 0:13:34 lr: 0.000061 grad: 0.0897 (0.0945) loss: 0.8163 (0.8234) time: 0.1392 data: 0.0509 max mem: 9377 +Train: [53] [1100/6250] eta: 0:13:28 lr: 0.000061 grad: 0.0898 (0.0943) loss: 0.8150 (0.8232) time: 0.1817 data: 0.0903 max mem: 9377 +Train: [53] [1200/6250] eta: 0:13:09 lr: 0.000061 grad: 0.0904 (0.0944) loss: 0.8170 (0.8227) time: 0.1802 data: 0.0920 max mem: 9377 +Train: [53] [1300/6250] eta: 0:12:54 lr: 0.000061 grad: 0.0889 (0.0942) loss: 0.8188 (0.8224) time: 0.1665 data: 0.0813 max mem: 9377 +Train: [53] [1400/6250] eta: 0:12:36 lr: 0.000061 grad: 0.0905 (0.0942) loss: 0.8185 (0.8221) time: 0.1376 data: 0.0547 max mem: 9377 +Train: [53] [1500/6250] eta: 0:12:24 lr: 0.000061 grad: 0.0924 (0.0944) loss: 0.8158 (0.8218) time: 0.1660 data: 0.0862 max mem: 9377 +Train: [53] [1600/6250] eta: 0:12:10 lr: 0.000061 grad: 0.0918 (0.0943) loss: 0.8130 (0.8215) time: 0.1515 data: 0.0598 max mem: 9377 +Train: [53] [1700/6250] eta: 0:11:55 lr: 0.000061 grad: 0.0938 (0.0943) loss: 0.8150 (0.8212) time: 0.1514 data: 0.0698 max mem: 9377 +Train: [53] [1800/6250] eta: 0:11:38 lr: 0.000061 grad: 0.0936 (0.0946) loss: 0.8156 (0.8210) time: 0.1586 data: 0.0689 max mem: 9377 +Train: [53] [1900/6250] eta: 0:11:19 lr: 0.000061 grad: 0.0951 (0.0947) loss: 0.8164 (0.8207) time: 0.1360 data: 0.0497 max mem: 9377 +Train: [53] [2000/6250] eta: 0:11:02 lr: 0.000061 grad: 0.0913 (0.0946) loss: 0.8169 (0.8206) time: 0.1513 data: 0.0683 max mem: 9377 +Train: [53] [2100/6250] eta: 0:10:45 lr: 0.000061 grad: 0.0897 (0.0947) loss: 0.8175 (0.8204) time: 0.1410 data: 0.0535 max mem: 9377 +Train: [53] [2200/6250] eta: 0:10:29 lr: 0.000061 grad: 0.0880 (0.0946) loss: 0.8217 (0.8203) time: 0.1565 data: 0.0718 max mem: 9377 +Train: [53] [2300/6250] eta: 0:10:11 lr: 0.000061 grad: 0.0935 (0.0946) loss: 0.8195 (0.8201) time: 0.1393 data: 0.0572 max mem: 9377 +Train: [53] [2400/6250] eta: 0:09:55 lr: 0.000061 grad: 0.0896 (0.0946) loss: 0.8123 (0.8200) time: 0.1599 data: 0.0748 max mem: 9377 +Train: [53] [2500/6250] eta: 0:09:37 lr: 0.000061 grad: 0.0924 (0.0946) loss: 0.8155 (0.8199) time: 0.1294 data: 0.0491 max mem: 9377 +Train: [53] [2600/6250] eta: 0:09:19 lr: 0.000061 grad: 0.0936 (0.0948) loss: 0.8203 (0.8197) time: 0.1149 data: 0.0260 max mem: 9377 +Train: [53] [2700/6250] eta: 0:09:01 lr: 0.000061 grad: 0.0887 (0.0949) loss: 0.8152 (0.8194) time: 0.1274 data: 0.0373 max mem: 9377 +Train: [53] [2800/6250] eta: 0:08:45 lr: 0.000061 grad: 0.1012 (0.0950) loss: 0.8076 (0.8193) time: 0.1540 data: 0.0633 max mem: 9377 +Train: [53] [2900/6250] eta: 0:08:28 lr: 0.000061 grad: 0.0963 (0.0951) loss: 0.8089 (0.8190) time: 0.1253 data: 0.0347 max mem: 9377 +Train: [53] [3000/6250] eta: 0:08:12 lr: 0.000060 grad: 0.0973 (0.0953) loss: 0.8146 (0.8187) time: 0.1408 data: 0.0532 max mem: 9377 +Train: [53] [3100/6250] eta: 0:07:56 lr: 0.000060 grad: 0.0988 (0.0954) loss: 0.8102 (0.8185) time: 0.1376 data: 0.0549 max mem: 9377 +Train: [53] [3200/6250] eta: 0:07:40 lr: 0.000060 grad: 0.0947 (0.0955) loss: 0.8050 (0.8182) time: 0.1316 data: 0.0499 max mem: 9377 +Train: [53] [3300/6250] eta: 0:07:24 lr: 0.000060 grad: 0.0953 (0.0956) loss: 0.8092 (0.8180) time: 0.1403 data: 0.0562 max mem: 9377 +Train: [53] [3400/6250] eta: 0:07:09 lr: 0.000060 grad: 0.0912 (0.0958) loss: 0.8209 (0.8179) time: 0.1335 data: 0.0435 max mem: 9377 +Train: [53] [3500/6250] eta: 0:06:53 lr: 0.000060 grad: 0.0950 (0.0960) loss: 0.8133 (0.8177) time: 0.1417 data: 0.0604 max mem: 9377 +Train: [53] [3600/6250] eta: 0:06:37 lr: 0.000060 grad: 0.1011 (0.0961) loss: 0.8114 (0.8175) time: 0.1446 data: 0.0596 max mem: 9377 +Train: [53] [3700/6250] eta: 0:06:21 lr: 0.000060 grad: 0.1005 (0.0962) loss: 0.8170 (0.8174) time: 0.1405 data: 0.0584 max mem: 9377 +Train: [53] [3800/6250] eta: 0:06:06 lr: 0.000060 grad: 0.0961 (0.0964) loss: 0.8147 (0.8173) time: 0.1310 data: 0.0486 max mem: 9377 +Train: [53] [3900/6250] eta: 0:05:50 lr: 0.000060 grad: 0.0943 (0.0963) loss: 0.8151 (0.8172) time: 0.1568 data: 0.0789 max mem: 9377 +Train: [53] [4000/6250] eta: 0:05:35 lr: 0.000060 grad: 0.0988 (0.0963) loss: 0.8214 (0.8172) time: 0.1317 data: 0.0535 max mem: 9377 +Train: [53] [4100/6250] eta: 0:05:20 lr: 0.000060 grad: 0.0925 (0.0963) loss: 0.8209 (0.8172) time: 0.1525 data: 0.0649 max mem: 9377 +Train: [53] [4200/6250] eta: 0:05:04 lr: 0.000060 grad: 0.0921 (0.0964) loss: 0.8153 (0.8172) time: 0.1439 data: 0.0596 max mem: 9377 +Train: [53] [4300/6250] eta: 0:04:49 lr: 0.000060 grad: 0.0929 (0.0965) loss: 0.8166 (0.8171) time: 0.1536 data: 0.0776 max mem: 9377 +Train: [53] [4400/6250] eta: 0:04:34 lr: 0.000060 grad: 0.0918 (0.0965) loss: 0.8168 (0.8171) time: 0.1402 data: 0.0530 max mem: 9377 +Train: [53] [4500/6250] eta: 0:04:19 lr: 0.000060 grad: 0.0965 (0.0966) loss: 0.8222 (0.8172) time: 0.1427 data: 0.0601 max mem: 9377 +Train: [53] [4600/6250] eta: 0:04:04 lr: 0.000060 grad: 0.0862 (0.0966) loss: 0.8221 (0.8172) time: 0.1475 data: 0.0611 max mem: 9377 +Train: [53] [4700/6250] eta: 0:03:50 lr: 0.000060 grad: 0.0956 (0.0966) loss: 0.8169 (0.8172) time: 0.1784 data: 0.0974 max mem: 9377 +Train: [53] [4800/6250] eta: 0:03:35 lr: 0.000060 grad: 0.0896 (0.0966) loss: 0.8142 (0.8172) time: 0.1409 data: 0.0600 max mem: 9377 +Train: [53] [4900/6250] eta: 0:03:20 lr: 0.000060 grad: 0.0958 (0.0965) loss: 0.8144 (0.8172) time: 0.1318 data: 0.0433 max mem: 9377 +Train: [53] [5000/6250] eta: 0:03:06 lr: 0.000060 grad: 0.1008 (0.0966) loss: 0.8196 (0.8172) time: 0.1252 data: 0.0465 max mem: 9377 +Train: [53] [5100/6250] eta: 0:02:51 lr: 0.000060 grad: 0.0915 (0.0966) loss: 0.8165 (0.8172) time: 0.1562 data: 0.0780 max mem: 9377 +Train: [53] [5200/6250] eta: 0:02:36 lr: 0.000060 grad: 0.0946 (0.0966) loss: 0.8175 (0.8173) time: 0.1740 data: 0.0996 max mem: 9377 +Train: [53] [5300/6250] eta: 0:02:21 lr: 0.000060 grad: 0.0926 (0.0966) loss: 0.8224 (0.8173) time: 0.1467 data: 0.0740 max mem: 9377 +Train: [53] [5400/6250] eta: 0:02:06 lr: 0.000060 grad: 0.0930 (0.0966) loss: 0.8217 (0.8174) time: 0.1573 data: 0.0749 max mem: 9377 +Train: [53] [5500/6250] eta: 0:01:51 lr: 0.000060 grad: 0.0968 (0.0966) loss: 0.8188 (0.8174) time: 0.1325 data: 0.0468 max mem: 9377 +Train: [53] [5600/6250] eta: 0:01:36 lr: 0.000060 grad: 0.0940 (0.0966) loss: 0.8190 (0.8174) time: 0.1563 data: 0.0703 max mem: 9377 +Train: [53] [5700/6250] eta: 0:01:21 lr: 0.000060 grad: 0.0912 (0.0966) loss: 0.8181 (0.8174) time: 0.1667 data: 0.0873 max mem: 9377 +Train: [53] [5800/6250] eta: 0:01:07 lr: 0.000060 grad: 0.1041 (0.0966) loss: 0.8085 (0.8174) time: 0.1396 data: 0.0575 max mem: 9377 +Train: [53] [5900/6250] eta: 0:00:52 lr: 0.000060 grad: 0.0989 (0.0967) loss: 0.8117 (0.8173) time: 0.1545 data: 0.0789 max mem: 9377 +Train: [53] [6000/6250] eta: 0:00:37 lr: 0.000059 grad: 0.1005 (0.0967) loss: 0.8145 (0.8173) time: 0.1501 data: 0.0664 max mem: 9377 +Train: [53] [6100/6250] eta: 0:00:22 lr: 0.000059 grad: 0.0964 (0.0968) loss: 0.8174 (0.8172) time: 0.1387 data: 0.0582 max mem: 9377 +Train: [53] [6200/6250] eta: 0:00:07 lr: 0.000059 grad: 0.0930 (0.0968) loss: 0.8118 (0.8171) time: 0.1617 data: 0.0830 max mem: 9377 +Train: [53] [6249/6250] eta: 0:00:00 lr: 0.000059 grad: 0.0967 (0.0968) loss: 0.8141 (0.8171) time: 0.1845 data: 0.1103 max mem: 9377 +Train: [53] Total time: 0:15:38 (0.1502 s / it) +Averaged stats: lr: 0.000059 grad: 0.0967 (0.0968) loss: 0.8141 (0.8171) +Eval (hcp-train-subset): [53] [ 0/62] eta: 0:04:09 loss: 0.8348 (0.8348) time: 4.0195 data: 3.9413 max mem: 9377 +Eval (hcp-train-subset): [53] [61/62] eta: 0:00:00 loss: 0.8312 (0.8314) time: 0.1385 data: 0.1131 max mem: 9377 +Eval (hcp-train-subset): [53] Total time: 0:00:13 (0.2174 s / it) +Averaged stats (hcp-train-subset): loss: 0.8312 (0.8314) +Eval (hcp-val): [53] [ 0/62] eta: 0:05:27 loss: 0.8322 (0.8322) time: 5.2799 data: 5.2487 max mem: 9377 +Eval (hcp-val): [53] [61/62] eta: 0:00:00 loss: 0.8347 (0.8355) time: 0.1312 data: 0.1037 max mem: 9377 +Eval (hcp-val): [53] Total time: 0:00:12 (0.2096 s / it) +Averaged stats (hcp-val): loss: 0.8347 (0.8355) +Eval (nsd-val): [53] [ 0/62] eta: 0:04:19 loss: 0.8052 (0.8052) time: 4.1933 data: 4.1244 max mem: 9377 +Eval (nsd-val): [53] [61/62] eta: 0:00:00 loss: 0.8103 (0.8138) time: 0.1165 data: 0.0914 max mem: 9377 +Eval (nsd-val): [53] Total time: 0:00:12 (0.2074 s / it) +Averaged stats (nsd-val): loss: 0.8103 (0.8138) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [54] [ 0/6250] eta: 10:17:49 lr: 0.000059 grad: 0.0858 (0.0858) loss: 0.8393 (0.8393) time: 5.9311 data: 5.8396 max mem: 9377 +Train: [54] [ 100/6250] eta: 0:19:42 lr: 0.000059 grad: 0.0862 (0.0918) loss: 0.8332 (0.8349) time: 0.1511 data: 0.0448 max mem: 9377 +Train: [54] [ 200/6250] eta: 0:16:54 lr: 0.000059 grad: 0.0971 (0.0942) loss: 0.8186 (0.8278) time: 0.1206 data: 0.0316 max mem: 9377 +Train: [54] [ 300/6250] eta: 0:15:48 lr: 0.000059 grad: 0.0923 (0.0963) loss: 0.8206 (0.8227) time: 0.1285 data: 0.0408 max mem: 9377 +Train: [54] [ 400/6250] eta: 0:15:11 lr: 0.000059 grad: 0.0892 (0.0957) loss: 0.8197 (0.8216) time: 0.1577 data: 0.0741 max mem: 9377 +Train: [54] [ 500/6250] eta: 0:14:28 lr: 0.000059 grad: 0.0926 (0.0958) loss: 0.8151 (0.8201) time: 0.1242 data: 0.0342 max mem: 9377 +Train: [54] [ 600/6250] eta: 0:14:04 lr: 0.000059 grad: 0.0925 (0.0957) loss: 0.8091 (0.8193) time: 0.1291 data: 0.0342 max mem: 9377 +Train: [54] [ 700/6250] eta: 0:13:43 lr: 0.000059 grad: 0.0882 (0.0953) loss: 0.8243 (0.8192) time: 0.1452 data: 0.0573 max mem: 9377 +Train: [54] [ 800/6250] eta: 0:13:21 lr: 0.000059 grad: 0.0980 (0.0957) loss: 0.8147 (0.8185) time: 0.1354 data: 0.0469 max mem: 9377 +Train: [54] [ 900/6250] eta: 0:13:09 lr: 0.000059 grad: 0.0859 (0.0956) loss: 0.8197 (0.8183) time: 0.1979 data: 0.1194 max mem: 9377 +Train: [54] [1000/6250] eta: 0:13:00 lr: 0.000059 grad: 0.0912 (0.0955) loss: 0.8149 (0.8182) time: 0.1340 data: 0.0500 max mem: 9377 +Train: [54] [1100/6250] eta: 0:12:48 lr: 0.000059 grad: 0.0970 (0.0955) loss: 0.8139 (0.8179) time: 0.1552 data: 0.0666 max mem: 9377 +Train: [54] [1200/6250] eta: 0:12:29 lr: 0.000059 grad: 0.0996 (0.0959) loss: 0.8098 (0.8174) time: 0.1379 data: 0.0585 max mem: 9377 +Train: [54] [1300/6250] eta: 0:12:12 lr: 0.000059 grad: 0.0932 (0.0960) loss: 0.8201 (0.8171) time: 0.1452 data: 0.0613 max mem: 9377 +Train: [54] [1400/6250] eta: 0:11:56 lr: 0.000059 grad: 0.0997 (0.0961) loss: 0.8165 (0.8169) time: 0.1482 data: 0.0605 max mem: 9377 +Train: [54] [1500/6250] eta: 0:11:39 lr: 0.000059 grad: 0.0973 (0.0963) loss: 0.8108 (0.8164) time: 0.1383 data: 0.0537 max mem: 9377 +Train: [54] [1600/6250] eta: 0:11:22 lr: 0.000059 grad: 0.0964 (0.0965) loss: 0.8045 (0.8161) time: 0.1386 data: 0.0559 max mem: 9377 +Train: [54] [1700/6250] eta: 0:11:03 lr: 0.000059 grad: 0.0933 (0.0966) loss: 0.8120 (0.8158) time: 0.1034 data: 0.0075 max mem: 9377 +Train: [54] [1800/6250] eta: 0:10:45 lr: 0.000059 grad: 0.0909 (0.0966) loss: 0.8169 (0.8156) time: 0.1214 data: 0.0292 max mem: 9377 +Train: [54] [1900/6250] eta: 0:10:27 lr: 0.000059 grad: 0.0945 (0.0966) loss: 0.8119 (0.8156) time: 0.1195 data: 0.0350 max mem: 9377 +Train: [54] [2000/6250] eta: 0:10:10 lr: 0.000059 grad: 0.1042 (0.0968) loss: 0.8117 (0.8154) time: 0.1263 data: 0.0449 max mem: 9377 +Train: [54] [2100/6250] eta: 0:09:56 lr: 0.000059 grad: 0.0928 (0.0969) loss: 0.8131 (0.8153) time: 0.1527 data: 0.0674 max mem: 9377 +Train: [54] [2200/6250] eta: 0:09:41 lr: 0.000059 grad: 0.0916 (0.0970) loss: 0.8180 (0.8153) time: 0.1143 data: 0.0324 max mem: 9377 +Train: [54] [2300/6250] eta: 0:09:26 lr: 0.000059 grad: 0.0953 (0.0971) loss: 0.8168 (0.8152) time: 0.1526 data: 0.0687 max mem: 9377 +Train: [54] [2400/6250] eta: 0:09:12 lr: 0.000059 grad: 0.1006 (0.0973) loss: 0.8168 (0.8151) time: 0.1489 data: 0.0648 max mem: 9377 +Train: [54] [2500/6250] eta: 0:08:58 lr: 0.000059 grad: 0.0941 (0.0975) loss: 0.8168 (0.8150) time: 0.1485 data: 0.0660 max mem: 9377 +Train: [54] [2600/6250] eta: 0:08:43 lr: 0.000059 grad: 0.0956 (0.0976) loss: 0.8149 (0.8150) time: 0.1571 data: 0.0747 max mem: 9377 +Train: [54] [2700/6250] eta: 0:08:28 lr: 0.000059 grad: 0.0925 (0.0976) loss: 0.8149 (0.8150) time: 0.1359 data: 0.0494 max mem: 9377 +Train: [54] [2800/6250] eta: 0:08:13 lr: 0.000058 grad: 0.1005 (0.0978) loss: 0.8103 (0.8149) time: 0.1388 data: 0.0510 max mem: 9377 +Train: [54] [2900/6250] eta: 0:07:58 lr: 0.000058 grad: 0.0907 (0.0978) loss: 0.8116 (0.8148) time: 0.1238 data: 0.0427 max mem: 9377 +Train: [54] [3000/6250] eta: 0:07:44 lr: 0.000058 grad: 0.1056 (0.0979) loss: 0.8078 (0.8148) time: 0.1396 data: 0.0494 max mem: 9377 +Train: [54] [3100/6250] eta: 0:07:29 lr: 0.000058 grad: 0.1005 (0.0979) loss: 0.8146 (0.8147) time: 0.1571 data: 0.0798 max mem: 9377 +Train: [54] [3200/6250] eta: 0:07:14 lr: 0.000058 grad: 0.0944 (0.0980) loss: 0.8073 (0.8146) time: 0.1427 data: 0.0586 max mem: 9377 +Train: [54] [3300/6250] eta: 0:07:00 lr: 0.000058 grad: 0.1040 (0.0982) loss: 0.8125 (0.8145) time: 0.1371 data: 0.0582 max mem: 9377 +Train: [54] [3400/6250] eta: 0:06:45 lr: 0.000058 grad: 0.0970 (0.0982) loss: 0.8144 (0.8145) time: 0.1224 data: 0.0367 max mem: 9377 +Train: [54] [3500/6250] eta: 0:06:31 lr: 0.000058 grad: 0.0968 (0.0983) loss: 0.8169 (0.8144) time: 0.1449 data: 0.0643 max mem: 9377 +Train: [54] [3600/6250] eta: 0:06:16 lr: 0.000058 grad: 0.0945 (0.0983) loss: 0.8134 (0.8144) time: 0.1356 data: 0.0500 max mem: 9377 +Train: [54] [3700/6250] eta: 0:06:01 lr: 0.000058 grad: 0.0991 (0.0983) loss: 0.8119 (0.8144) time: 0.1438 data: 0.0582 max mem: 9377 +Train: [54] [3800/6250] eta: 0:05:47 lr: 0.000058 grad: 0.0933 (0.0983) loss: 0.8159 (0.8143) time: 0.1363 data: 0.0531 max mem: 9377 +Train: [54] [3900/6250] eta: 0:05:33 lr: 0.000058 grad: 0.0984 (0.0983) loss: 0.8161 (0.8143) time: 0.1453 data: 0.0601 max mem: 9377 +Train: [54] [4000/6250] eta: 0:05:19 lr: 0.000058 grad: 0.0943 (0.0983) loss: 0.8132 (0.8143) time: 0.1240 data: 0.0461 max mem: 9377 +Train: [54] [4100/6250] eta: 0:05:04 lr: 0.000058 grad: 0.0942 (0.0983) loss: 0.8163 (0.8143) time: 0.1304 data: 0.0429 max mem: 9377 +Train: [54] [4200/6250] eta: 0:04:50 lr: 0.000058 grad: 0.0931 (0.0984) loss: 0.8131 (0.8143) time: 0.1414 data: 0.0555 max mem: 9377 +Train: [54] [4300/6250] eta: 0:04:36 lr: 0.000058 grad: 0.0951 (0.0984) loss: 0.8127 (0.8142) time: 0.1507 data: 0.0691 max mem: 9377 +Train: [54] [4400/6250] eta: 0:04:21 lr: 0.000058 grad: 0.0990 (0.0984) loss: 0.8110 (0.8142) time: 0.1455 data: 0.0636 max mem: 9377 +Train: [54] [4500/6250] eta: 0:04:07 lr: 0.000058 grad: 0.0958 (0.0985) loss: 0.8153 (0.8141) time: 0.1331 data: 0.0507 max mem: 9377 +Train: [54] [4600/6250] eta: 0:03:52 lr: 0.000058 grad: 0.1004 (0.0986) loss: 0.8123 (0.8141) time: 0.1442 data: 0.0620 max mem: 9377 +Train: [54] [4700/6250] eta: 0:03:38 lr: 0.000058 grad: 0.1086 (0.0987) loss: 0.8047 (0.8139) time: 0.1350 data: 0.0419 max mem: 9377 +Train: [54] [4800/6250] eta: 0:03:24 lr: 0.000058 grad: 0.0950 (0.0988) loss: 0.8131 (0.8138) time: 0.1329 data: 0.0494 max mem: 9377 +Train: [54] [4900/6250] eta: 0:03:10 lr: 0.000058 grad: 0.1014 (0.0988) loss: 0.8100 (0.8137) time: 0.1225 data: 0.0390 max mem: 9377 +Train: [54] [5000/6250] eta: 0:02:56 lr: 0.000058 grad: 0.0962 (0.0989) loss: 0.8147 (0.8137) time: 0.1348 data: 0.0523 max mem: 9377 +Train: [54] [5100/6250] eta: 0:02:42 lr: 0.000058 grad: 0.0994 (0.0990) loss: 0.8105 (0.8137) time: 0.1446 data: 0.0630 max mem: 9377 +Train: [54] [5200/6250] eta: 0:02:28 lr: 0.000058 grad: 0.1060 (0.0990) loss: 0.8094 (0.8136) time: 0.1617 data: 0.0807 max mem: 9377 +Train: [54] [5300/6250] eta: 0:02:14 lr: 0.000058 grad: 0.0969 (0.0990) loss: 0.8110 (0.8136) time: 0.1493 data: 0.0648 max mem: 9377 +Train: [54] [5400/6250] eta: 0:01:59 lr: 0.000058 grad: 0.0950 (0.0990) loss: 0.8167 (0.8136) time: 0.1352 data: 0.0448 max mem: 9377 +Train: [54] [5500/6250] eta: 0:01:45 lr: 0.000058 grad: 0.0988 (0.0991) loss: 0.8167 (0.8136) time: 0.1378 data: 0.0577 max mem: 9377 +Train: [54] [5600/6250] eta: 0:01:31 lr: 0.000058 grad: 0.1007 (0.0991) loss: 0.8072 (0.8136) time: 0.1600 data: 0.0826 max mem: 9377 +Train: [54] [5700/6250] eta: 0:01:17 lr: 0.000058 grad: 0.0986 (0.0991) loss: 0.8083 (0.8136) time: 0.1171 data: 0.0375 max mem: 9377 +Train: [54] [5800/6250] eta: 0:01:03 lr: 0.000057 grad: 0.0970 (0.0991) loss: 0.8085 (0.8135) time: 0.1553 data: 0.0782 max mem: 9377 +Train: [54] [5900/6250] eta: 0:00:49 lr: 0.000057 grad: 0.0991 (0.0992) loss: 0.8135 (0.8135) time: 0.1598 data: 0.0770 max mem: 9377 +Train: [54] [6000/6250] eta: 0:00:35 lr: 0.000057 grad: 0.0987 (0.0993) loss: 0.8162 (0.8135) time: 0.1538 data: 0.0729 max mem: 9377 +Train: [54] [6100/6250] eta: 0:00:21 lr: 0.000057 grad: 0.1017 (0.0994) loss: 0.8113 (0.8135) time: 0.1100 data: 0.0260 max mem: 9377 +Train: [54] [6200/6250] eta: 0:00:07 lr: 0.000057 grad: 0.0929 (0.0994) loss: 0.8188 (0.8135) time: 0.1472 data: 0.0659 max mem: 9377 +Train: [54] [6249/6250] eta: 0:00:00 lr: 0.000057 grad: 0.1024 (0.0994) loss: 0.8074 (0.8135) time: 0.1461 data: 0.0619 max mem: 9377 +Train: [54] Total time: 0:14:51 (0.1427 s / it) +Averaged stats: lr: 0.000057 grad: 0.1024 (0.0994) loss: 0.8074 (0.8135) +Eval (hcp-train-subset): [54] [ 0/62] eta: 0:03:54 loss: 0.8310 (0.8310) time: 3.7803 data: 3.7156 max mem: 9377 +Eval (hcp-train-subset): [54] [61/62] eta: 0:00:00 loss: 0.8290 (0.8298) time: 0.1315 data: 0.1062 max mem: 9377 +Eval (hcp-train-subset): [54] Total time: 0:00:13 (0.2245 s / it) +Averaged stats (hcp-train-subset): loss: 0.8290 (0.8298) +Making plots (hcp-train-subset): example=29 +Eval (hcp-val): [54] [ 0/62] eta: 0:05:33 loss: 0.8327 (0.8327) time: 5.3803 data: 5.3490 max mem: 9377 +Eval (hcp-val): [54] [61/62] eta: 0:00:00 loss: 0.8324 (0.8341) time: 0.1455 data: 0.1187 max mem: 9377 +Eval (hcp-val): [54] Total time: 0:00:14 (0.2283 s / it) +Averaged stats (hcp-val): loss: 0.8324 (0.8341) +Making plots (hcp-val): example=49 +Eval (nsd-val): [54] [ 0/62] eta: 0:05:46 loss: 0.8012 (0.8012) time: 5.5847 data: 5.5535 max mem: 9377 +Eval (nsd-val): [54] [61/62] eta: 0:00:00 loss: 0.8099 (0.8124) time: 0.1397 data: 0.1122 max mem: 9377 +Eval (nsd-val): [54] Total time: 0:00:14 (0.2333 s / it) +Averaged stats (nsd-val): loss: 0.8099 (0.8124) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00054.pth +Train: [55] [ 0/6250] eta: 12:12:37 lr: 0.000057 grad: 0.1148 (0.1148) loss: 0.8385 (0.8385) time: 7.0333 data: 6.9344 max mem: 9377 +Train: [55] [ 100/6250] eta: 0:20:47 lr: 0.000057 grad: 0.0808 (0.0992) loss: 0.8363 (0.8385) time: 0.1430 data: 0.0438 max mem: 9377 +Train: [55] [ 200/6250] eta: 0:17:08 lr: 0.000057 grad: 0.0865 (0.0979) loss: 0.8338 (0.8334) time: 0.1157 data: 0.0239 max mem: 9377 +Train: [55] [ 300/6250] eta: 0:15:50 lr: 0.000057 grad: 0.1010 (0.0986) loss: 0.8152 (0.8288) time: 0.1448 data: 0.0493 max mem: 9377 +Train: [55] [ 400/6250] eta: 0:15:07 lr: 0.000057 grad: 0.0913 (0.0975) loss: 0.8264 (0.8269) time: 0.1586 data: 0.0715 max mem: 9377 +Train: [55] [ 500/6250] eta: 0:14:52 lr: 0.000057 grad: 0.0936 (0.0973) loss: 0.8211 (0.8254) time: 0.1759 data: 0.0906 max mem: 9377 +Train: [55] [ 600/6250] eta: 0:14:37 lr: 0.000057 grad: 0.0948 (0.0970) loss: 0.8122 (0.8243) time: 0.1540 data: 0.0653 max mem: 9377 +Train: [55] [ 700/6250] eta: 0:14:32 lr: 0.000057 grad: 0.0937 (0.0973) loss: 0.8173 (0.8230) time: 0.1845 data: 0.0842 max mem: 9377 +Train: [55] [ 800/6250] eta: 0:14:17 lr: 0.000057 grad: 0.0948 (0.0970) loss: 0.8177 (0.8222) time: 0.1650 data: 0.0691 max mem: 9377 +Train: [55] [ 900/6250] eta: 0:14:01 lr: 0.000057 grad: 0.0942 (0.0971) loss: 0.8204 (0.8215) time: 0.1577 data: 0.0689 max mem: 9377 +Train: [55] [1000/6250] eta: 0:13:49 lr: 0.000057 grad: 0.0875 (0.0967) loss: 0.8249 (0.8212) time: 0.1710 data: 0.0842 max mem: 9377 +Train: [55] [1100/6250] eta: 0:13:38 lr: 0.000057 grad: 0.0924 (0.0966) loss: 0.8220 (0.8211) time: 0.1671 data: 0.0813 max mem: 9377 +Train: [55] [1200/6250] eta: 0:13:19 lr: 0.000057 grad: 0.0907 (0.0963) loss: 0.8213 (0.8209) time: 0.1433 data: 0.0591 max mem: 9377 +Train: [55] [1300/6250] eta: 0:12:57 lr: 0.000057 grad: 0.0947 (0.0963) loss: 0.8202 (0.8206) time: 0.1268 data: 0.0428 max mem: 9377 +Train: [55] [1400/6250] eta: 0:12:35 lr: 0.000057 grad: 0.0935 (0.0964) loss: 0.8142 (0.8202) time: 0.1345 data: 0.0531 max mem: 9377 +Train: [55] [1500/6250] eta: 0:12:14 lr: 0.000057 grad: 0.0889 (0.0963) loss: 0.8261 (0.8201) time: 0.1119 data: 0.0200 max mem: 9377 +Train: [55] [1600/6250] eta: 0:11:59 lr: 0.000057 grad: 0.0975 (0.0963) loss: 0.8187 (0.8200) time: 0.1409 data: 0.0607 max mem: 9377 +Train: [55] [1700/6250] eta: 0:11:42 lr: 0.000057 grad: 0.0926 (0.0962) loss: 0.8187 (0.8198) time: 0.1680 data: 0.0899 max mem: 9377 +Train: [55] [1800/6250] eta: 0:11:28 lr: 0.000057 grad: 0.0937 (0.0962) loss: 0.8179 (0.8197) time: 0.1799 data: 0.1049 max mem: 9377 +Train: [55] [1900/6250] eta: 0:11:15 lr: 0.000057 grad: 0.0894 (0.0963) loss: 0.8181 (0.8195) time: 0.1489 data: 0.0602 max mem: 9377 +Train: [55] [2000/6250] eta: 0:11:01 lr: 0.000057 grad: 0.0978 (0.0964) loss: 0.8112 (0.8193) time: 0.1443 data: 0.0603 max mem: 9377 +Train: [55] [2100/6250] eta: 0:10:49 lr: 0.000057 grad: 0.0953 (0.0965) loss: 0.8120 (0.8191) time: 0.2216 data: 0.1413 max mem: 9377 +Train: [55] [2200/6250] eta: 0:10:30 lr: 0.000057 grad: 0.0952 (0.0966) loss: 0.8169 (0.8189) time: 0.1269 data: 0.0490 max mem: 9377 +Train: [55] [2300/6250] eta: 0:10:15 lr: 0.000057 grad: 0.0970 (0.0966) loss: 0.8175 (0.8187) time: 0.1602 data: 0.0844 max mem: 9377 +Train: [55] [2400/6250] eta: 0:09:58 lr: 0.000057 grad: 0.0918 (0.0966) loss: 0.8190 (0.8185) time: 0.1307 data: 0.0470 max mem: 9377 +Train: [55] [2500/6250] eta: 0:09:40 lr: 0.000057 grad: 0.0931 (0.0966) loss: 0.8269 (0.8185) time: 0.1412 data: 0.0575 max mem: 9377 +Train: [55] [2600/6250] eta: 0:09:23 lr: 0.000056 grad: 0.0934 (0.0966) loss: 0.8204 (0.8185) time: 0.1597 data: 0.0759 max mem: 9377 +Train: [55] [2700/6250] eta: 0:09:05 lr: 0.000056 grad: 0.0914 (0.0967) loss: 0.8243 (0.8184) time: 0.1358 data: 0.0524 max mem: 9377 +Train: [55] [2800/6250] eta: 0:08:48 lr: 0.000056 grad: 0.0921 (0.0967) loss: 0.8200 (0.8184) time: 0.1417 data: 0.0595 max mem: 9377 +Train: [55] [2900/6250] eta: 0:08:32 lr: 0.000056 grad: 0.0927 (0.0967) loss: 0.8160 (0.8183) time: 0.1517 data: 0.0661 max mem: 9377 +Train: [55] [3000/6250] eta: 0:08:17 lr: 0.000056 grad: 0.0983 (0.0967) loss: 0.8190 (0.8182) time: 0.1580 data: 0.0769 max mem: 9377 +Train: [55] [3100/6250] eta: 0:08:01 lr: 0.000056 grad: 0.0954 (0.0968) loss: 0.8162 (0.8183) time: 0.1090 data: 0.0218 max mem: 9377 +Train: [55] [3200/6250] eta: 0:07:45 lr: 0.000056 grad: 0.1000 (0.0969) loss: 0.8118 (0.8181) time: 0.1596 data: 0.0808 max mem: 9377 +Train: [55] [3300/6250] eta: 0:07:28 lr: 0.000056 grad: 0.0950 (0.0969) loss: 0.8159 (0.8181) time: 0.1547 data: 0.0699 max mem: 9377 +Train: [55] [3400/6250] eta: 0:07:12 lr: 0.000056 grad: 0.0916 (0.0969) loss: 0.8217 (0.8180) time: 0.1307 data: 0.0455 max mem: 9377 +Train: [55] [3500/6250] eta: 0:06:56 lr: 0.000056 grad: 0.1007 (0.0970) loss: 0.8088 (0.8180) time: 0.1405 data: 0.0530 max mem: 9377 +Train: [55] [3600/6250] eta: 0:06:40 lr: 0.000056 grad: 0.0963 (0.0970) loss: 0.8204 (0.8179) time: 0.1672 data: 0.0887 max mem: 9377 +Train: [55] [3700/6250] eta: 0:06:24 lr: 0.000056 grad: 0.1006 (0.0972) loss: 0.8162 (0.8179) time: 0.1541 data: 0.0737 max mem: 9377 +Train: [55] [3800/6250] eta: 0:06:08 lr: 0.000056 grad: 0.1021 (0.0974) loss: 0.8129 (0.8177) time: 0.1238 data: 0.0403 max mem: 9377 +Train: [55] [3900/6250] eta: 0:05:53 lr: 0.000056 grad: 0.0988 (0.0974) loss: 0.8163 (0.8176) time: 0.1524 data: 0.0714 max mem: 9377 +Train: [55] [4000/6250] eta: 0:05:37 lr: 0.000056 grad: 0.1078 (0.0975) loss: 0.8107 (0.8175) time: 0.1459 data: 0.0608 max mem: 9377 +Train: [55] [4100/6250] eta: 0:05:21 lr: 0.000056 grad: 0.0919 (0.0975) loss: 0.8142 (0.8175) time: 0.1377 data: 0.0589 max mem: 9377 +Train: [55] [4200/6250] eta: 0:05:06 lr: 0.000056 grad: 0.0981 (0.0976) loss: 0.8186 (0.8174) time: 0.1553 data: 0.0742 max mem: 9377 +Train: [55] [4300/6250] eta: 0:04:51 lr: 0.000056 grad: 0.0886 (0.0977) loss: 0.8115 (0.8173) time: 0.1487 data: 0.0656 max mem: 9377 +Train: [55] [4400/6250] eta: 0:04:35 lr: 0.000056 grad: 0.0918 (0.0976) loss: 0.8192 (0.8173) time: 0.1402 data: 0.0584 max mem: 9377 +Train: [55] [4500/6250] eta: 0:04:20 lr: 0.000056 grad: 0.0972 (0.0976) loss: 0.8170 (0.8173) time: 0.1503 data: 0.0715 max mem: 9377 +Train: [55] [4600/6250] eta: 0:04:05 lr: 0.000056 grad: 0.0985 (0.0976) loss: 0.8120 (0.8173) time: 0.1568 data: 0.0774 max mem: 9377 +Train: [55] [4700/6250] eta: 0:03:50 lr: 0.000056 grad: 0.0936 (0.0976) loss: 0.8247 (0.8173) time: 0.1421 data: 0.0566 max mem: 9377 +Train: [55] [4800/6250] eta: 0:03:34 lr: 0.000056 grad: 0.0959 (0.0976) loss: 0.8140 (0.8173) time: 0.1317 data: 0.0537 max mem: 9377 +Train: [55] [4900/6250] eta: 0:03:20 lr: 0.000056 grad: 0.1001 (0.0977) loss: 0.8135 (0.8173) time: 0.1821 data: 0.1033 max mem: 9377 +Train: [55] [5000/6250] eta: 0:03:04 lr: 0.000056 grad: 0.0967 (0.0977) loss: 0.8116 (0.8172) time: 0.1251 data: 0.0387 max mem: 9377 +Train: [55] [5100/6250] eta: 0:02:49 lr: 0.000056 grad: 0.1050 (0.0977) loss: 0.8061 (0.8172) time: 0.1221 data: 0.0472 max mem: 9377 +Train: [55] [5200/6250] eta: 0:02:35 lr: 0.000056 grad: 0.1014 (0.0978) loss: 0.8119 (0.8171) time: 0.1639 data: 0.0828 max mem: 9377 +Train: [55] [5300/6250] eta: 0:02:20 lr: 0.000056 grad: 0.0950 (0.0978) loss: 0.8100 (0.8170) time: 0.1620 data: 0.0800 max mem: 9377 +Train: [55] [5400/6250] eta: 0:02:05 lr: 0.000056 grad: 0.0971 (0.0978) loss: 0.8135 (0.8170) time: 0.1329 data: 0.0499 max mem: 9377 +Train: [55] [5500/6250] eta: 0:01:50 lr: 0.000056 grad: 0.0950 (0.0978) loss: 0.8170 (0.8170) time: 0.1518 data: 0.0716 max mem: 9377 +Train: [55] [5600/6250] eta: 0:01:36 lr: 0.000055 grad: 0.0953 (0.0978) loss: 0.8164 (0.8170) time: 0.1848 data: 0.1018 max mem: 9377 +Train: [55] [5700/6250] eta: 0:01:21 lr: 0.000055 grad: 0.0892 (0.0978) loss: 0.8199 (0.8170) time: 0.1525 data: 0.0665 max mem: 9377 +Train: [55] [5800/6250] eta: 0:01:06 lr: 0.000055 grad: 0.0894 (0.0978) loss: 0.8205 (0.8171) time: 0.1578 data: 0.0813 max mem: 9377 +Train: [55] [5900/6250] eta: 0:00:51 lr: 0.000055 grad: 0.0958 (0.0978) loss: 0.8179 (0.8171) time: 0.1430 data: 0.0598 max mem: 9377 +Train: [55] [6000/6250] eta: 0:00:37 lr: 0.000055 grad: 0.0921 (0.0978) loss: 0.8195 (0.8171) time: 0.1491 data: 0.0715 max mem: 9377 +Train: [55] [6100/6250] eta: 0:00:22 lr: 0.000055 grad: 0.0992 (0.0978) loss: 0.8159 (0.8171) time: 0.1544 data: 0.0750 max mem: 9377 +Train: [55] [6200/6250] eta: 0:00:07 lr: 0.000055 grad: 0.0986 (0.0979) loss: 0.8132 (0.8171) time: 0.1433 data: 0.0609 max mem: 9377 +Train: [55] [6249/6250] eta: 0:00:00 lr: 0.000055 grad: 0.0978 (0.0979) loss: 0.8170 (0.8171) time: 0.1552 data: 0.0664 max mem: 9377 +Train: [55] Total time: 0:15:28 (0.1486 s / it) +Averaged stats: lr: 0.000055 grad: 0.0978 (0.0979) loss: 0.8170 (0.8171) +Eval (hcp-train-subset): [55] [ 0/62] eta: 0:03:24 loss: 0.8305 (0.8305) time: 3.2964 data: 3.2268 max mem: 9377 +Eval (hcp-train-subset): [55] [61/62] eta: 0:00:00 loss: 0.8298 (0.8290) time: 0.1356 data: 0.1105 max mem: 9377 +Eval (hcp-train-subset): [55] Total time: 0:00:12 (0.2068 s / it) +Averaged stats (hcp-train-subset): loss: 0.8298 (0.8290) +Eval (hcp-val): [55] [ 0/62] eta: 0:05:35 loss: 0.8328 (0.8328) time: 5.4187 data: 5.3883 max mem: 9377 +Eval (hcp-val): [55] [61/62] eta: 0:00:00 loss: 0.8328 (0.8350) time: 0.1253 data: 0.1005 max mem: 9377 +Eval (hcp-val): [55] Total time: 0:00:12 (0.2089 s / it) +Averaged stats (hcp-val): loss: 0.8328 (0.8350) +Eval (nsd-val): [55] [ 0/62] eta: 0:04:12 loss: 0.8018 (0.8018) time: 4.0720 data: 4.0006 max mem: 9377 +Eval (nsd-val): [55] [61/62] eta: 0:00:00 loss: 0.8110 (0.8111) time: 0.1226 data: 0.0972 max mem: 9377 +Eval (nsd-val): [55] Total time: 0:00:12 (0.2014 s / it) +Averaged stats (nsd-val): loss: 0.8110 (0.8111) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [56] [ 0/6250] eta: 6:49:29 lr: 0.000055 grad: 0.0634 (0.0634) loss: 0.8626 (0.8626) time: 3.9311 data: 3.5973 max mem: 9377 +Train: [56] [ 100/6250] eta: 0:19:30 lr: 0.000055 grad: 0.0930 (0.1118) loss: 0.8303 (0.8288) time: 0.1442 data: 0.0479 max mem: 9377 +Train: [56] [ 200/6250] eta: 0:17:01 lr: 0.000055 grad: 0.0930 (0.1079) loss: 0.8246 (0.8255) time: 0.1493 data: 0.0608 max mem: 9377 +Train: [56] [ 300/6250] eta: 0:15:48 lr: 0.000055 grad: 0.0991 (0.1046) loss: 0.8227 (0.8249) time: 0.1428 data: 0.0464 max mem: 9377 +Train: [56] [ 400/6250] eta: 0:14:59 lr: 0.000055 grad: 0.1061 (0.1055) loss: 0.8157 (0.8225) time: 0.1553 data: 0.0672 max mem: 9377 +Train: [56] [ 500/6250] eta: 0:14:23 lr: 0.000055 grad: 0.0931 (0.1057) loss: 0.8145 (0.8207) time: 0.1652 data: 0.0836 max mem: 9377 +Train: [56] [ 600/6250] eta: 0:14:07 lr: 0.000055 grad: 0.0963 (0.1046) loss: 0.8184 (0.8195) time: 0.1532 data: 0.0731 max mem: 9377 +Train: [56] [ 700/6250] eta: 0:13:50 lr: 0.000055 grad: 0.0851 (0.1036) loss: 0.8215 (0.8191) time: 0.1314 data: 0.0461 max mem: 9377 +Train: [56] [ 800/6250] eta: 0:13:47 lr: 0.000055 grad: 0.0952 (0.1026) loss: 0.8189 (0.8191) time: 0.1892 data: 0.1031 max mem: 9377 +Train: [56] [ 900/6250] eta: 0:13:20 lr: 0.000055 grad: 0.0897 (0.1016) loss: 0.8213 (0.8192) time: 0.1260 data: 0.0458 max mem: 9377 +Train: [56] [1000/6250] eta: 0:13:05 lr: 0.000055 grad: 0.0957 (0.1014) loss: 0.8242 (0.8189) time: 0.1359 data: 0.0511 max mem: 9377 +Train: [56] [1100/6250] eta: 0:12:42 lr: 0.000055 grad: 0.0894 (0.1006) loss: 0.8294 (0.8190) time: 0.1345 data: 0.0507 max mem: 9377 +Train: [56] [1200/6250] eta: 0:12:23 lr: 0.000055 grad: 0.0913 (0.1003) loss: 0.8165 (0.8189) time: 0.1381 data: 0.0526 max mem: 9377 +Train: [56] [1300/6250] eta: 0:12:06 lr: 0.000055 grad: 0.0982 (0.1002) loss: 0.8142 (0.8186) time: 0.1413 data: 0.0601 max mem: 9377 +Train: [56] [1400/6250] eta: 0:11:47 lr: 0.000055 grad: 0.0981 (0.1000) loss: 0.8155 (0.8185) time: 0.1472 data: 0.0662 max mem: 9377 +Train: [56] [1500/6250] eta: 0:11:31 lr: 0.000055 grad: 0.0952 (0.1000) loss: 0.8173 (0.8182) time: 0.1454 data: 0.0653 max mem: 9377 +Train: [56] [1600/6250] eta: 0:11:18 lr: 0.000055 grad: 0.0948 (0.0999) loss: 0.8205 (0.8180) time: 0.1467 data: 0.0715 max mem: 9377 +Train: [56] [1700/6250] eta: 0:11:03 lr: 0.000055 grad: 0.1029 (0.0999) loss: 0.8124 (0.8179) time: 0.1496 data: 0.0722 max mem: 9377 +Train: [56] [1800/6250] eta: 0:10:46 lr: 0.000055 grad: 0.0956 (0.0998) loss: 0.8183 (0.8178) time: 0.1401 data: 0.0548 max mem: 9377 +Train: [56] [1900/6250] eta: 0:10:32 lr: 0.000055 grad: 0.1010 (0.0999) loss: 0.8078 (0.8177) time: 0.1249 data: 0.0425 max mem: 9377 +Train: [56] [2000/6250] eta: 0:10:15 lr: 0.000055 grad: 0.0994 (0.0998) loss: 0.8186 (0.8176) time: 0.1186 data: 0.0309 max mem: 9377 +Train: [56] [2100/6250] eta: 0:09:59 lr: 0.000055 grad: 0.0980 (0.0999) loss: 0.8150 (0.8175) time: 0.1437 data: 0.0613 max mem: 9377 +Train: [56] [2200/6250] eta: 0:09:46 lr: 0.000055 grad: 0.1011 (0.1000) loss: 0.8260 (0.8174) time: 0.1670 data: 0.0840 max mem: 9377 +Train: [56] [2300/6250] eta: 0:09:30 lr: 0.000055 grad: 0.1002 (0.1000) loss: 0.8150 (0.8174) time: 0.1283 data: 0.0452 max mem: 9377 +Train: [56] [2400/6250] eta: 0:09:15 lr: 0.000054 grad: 0.0992 (0.1000) loss: 0.8148 (0.8173) time: 0.1438 data: 0.0625 max mem: 9377 +Train: [56] [2500/6250] eta: 0:09:01 lr: 0.000054 grad: 0.1013 (0.1001) loss: 0.8164 (0.8172) time: 0.1291 data: 0.0467 max mem: 9377 +Train: [56] [2600/6250] eta: 0:08:46 lr: 0.000054 grad: 0.1026 (0.1002) loss: 0.8180 (0.8171) time: 0.1552 data: 0.0702 max mem: 9377 +Train: [56] [2700/6250] eta: 0:08:30 lr: 0.000054 grad: 0.0936 (0.1002) loss: 0.8163 (0.8171) time: 0.1448 data: 0.0649 max mem: 9377 +Train: [56] [2800/6250] eta: 0:08:15 lr: 0.000054 grad: 0.1009 (0.1002) loss: 0.8164 (0.8171) time: 0.1393 data: 0.0623 max mem: 9377 +Train: [56] [2900/6250] eta: 0:08:02 lr: 0.000054 grad: 0.1012 (0.1002) loss: 0.8084 (0.8170) time: 0.1602 data: 0.0780 max mem: 9377 +Train: [56] [3000/6250] eta: 0:07:47 lr: 0.000054 grad: 0.1036 (0.1004) loss: 0.8063 (0.8168) time: 0.1454 data: 0.0697 max mem: 9377 +Train: [56] [3100/6250] eta: 0:07:34 lr: 0.000054 grad: 0.1033 (0.1005) loss: 0.8116 (0.8166) time: 0.1477 data: 0.0696 max mem: 9377 +Train: [56] [3200/6250] eta: 0:07:20 lr: 0.000054 grad: 0.0979 (0.1005) loss: 0.8159 (0.8165) time: 0.1380 data: 0.0570 max mem: 9377 +Train: [56] [3300/6250] eta: 0:07:06 lr: 0.000054 grad: 0.1042 (0.1006) loss: 0.8127 (0.8165) time: 0.1534 data: 0.0749 max mem: 9377 +Train: [56] [3400/6250] eta: 0:06:51 lr: 0.000054 grad: 0.0984 (0.1007) loss: 0.8159 (0.8165) time: 0.1439 data: 0.0603 max mem: 9377 +Train: [56] [3500/6250] eta: 0:06:36 lr: 0.000054 grad: 0.0993 (0.1008) loss: 0.8137 (0.8164) time: 0.1385 data: 0.0569 max mem: 9377 +Train: [56] [3600/6250] eta: 0:06:21 lr: 0.000054 grad: 0.0999 (0.1009) loss: 0.8159 (0.8163) time: 0.1350 data: 0.0493 max mem: 9377 +Train: [56] [3700/6250] eta: 0:06:07 lr: 0.000054 grad: 0.0977 (0.1010) loss: 0.8119 (0.8162) time: 0.1334 data: 0.0534 max mem: 9377 +Train: [56] [3800/6250] eta: 0:05:52 lr: 0.000054 grad: 0.1040 (0.1011) loss: 0.8065 (0.8160) time: 0.1514 data: 0.0660 max mem: 9377 +Train: [56] [3900/6250] eta: 0:05:38 lr: 0.000054 grad: 0.1020 (0.1012) loss: 0.8149 (0.8159) time: 0.1554 data: 0.0776 max mem: 9377 +Train: [56] [4000/6250] eta: 0:05:24 lr: 0.000054 grad: 0.0953 (0.1012) loss: 0.8115 (0.8158) time: 0.1573 data: 0.0681 max mem: 9377 +Train: [56] [4100/6250] eta: 0:05:10 lr: 0.000054 grad: 0.0972 (0.1012) loss: 0.8121 (0.8158) time: 0.1619 data: 0.0850 max mem: 9377 +Train: [56] [4200/6250] eta: 0:04:55 lr: 0.000054 grad: 0.0980 (0.1012) loss: 0.8129 (0.8158) time: 0.1362 data: 0.0528 max mem: 9377 +Train: [56] [4300/6250] eta: 0:04:40 lr: 0.000054 grad: 0.1016 (0.1012) loss: 0.8122 (0.8157) time: 0.1377 data: 0.0572 max mem: 9377 +Train: [56] [4400/6250] eta: 0:04:26 lr: 0.000054 grad: 0.0990 (0.1012) loss: 0.8159 (0.8156) time: 0.1427 data: 0.0602 max mem: 9377 +Train: [56] [4500/6250] eta: 0:04:12 lr: 0.000054 grad: 0.0976 (0.1012) loss: 0.8174 (0.8155) time: 0.1669 data: 0.0840 max mem: 9377 +Train: [56] [4600/6250] eta: 0:03:57 lr: 0.000054 grad: 0.0975 (0.1013) loss: 0.8138 (0.8154) time: 0.1325 data: 0.0498 max mem: 9377 +Train: [56] [4700/6250] eta: 0:03:43 lr: 0.000054 grad: 0.0963 (0.1013) loss: 0.8143 (0.8154) time: 0.0994 data: 0.0160 max mem: 9377 +Train: [56] [4800/6250] eta: 0:03:28 lr: 0.000054 grad: 0.0953 (0.1013) loss: 0.8074 (0.8153) time: 0.1418 data: 0.0602 max mem: 9377 +Train: [56] [4900/6250] eta: 0:03:14 lr: 0.000054 grad: 0.1061 (0.1013) loss: 0.8051 (0.8152) time: 0.1395 data: 0.0558 max mem: 9377 +Train: [56] [5000/6250] eta: 0:02:59 lr: 0.000054 grad: 0.0962 (0.1013) loss: 0.8156 (0.8152) time: 0.1456 data: 0.0587 max mem: 9377 +Train: [56] [5100/6250] eta: 0:02:45 lr: 0.000054 grad: 0.0961 (0.1013) loss: 0.8081 (0.8151) time: 0.1661 data: 0.0910 max mem: 9377 +Train: [56] [5200/6250] eta: 0:02:31 lr: 0.000054 grad: 0.1017 (0.1013) loss: 0.8058 (0.8151) time: 0.1329 data: 0.0525 max mem: 9377 +Train: [56] [5300/6250] eta: 0:02:16 lr: 0.000054 grad: 0.0936 (0.1013) loss: 0.8167 (0.8151) time: 0.1514 data: 0.0709 max mem: 9377 +Train: [56] [5400/6250] eta: 0:02:02 lr: 0.000054 grad: 0.1061 (0.1014) loss: 0.8121 (0.8151) time: 0.2114 data: 0.1284 max mem: 9377 +Train: [56] [5500/6250] eta: 0:01:48 lr: 0.000053 grad: 0.1043 (0.1014) loss: 0.8098 (0.8150) time: 0.1491 data: 0.0677 max mem: 9377 +Train: [56] [5600/6250] eta: 0:01:34 lr: 0.000053 grad: 0.0965 (0.1014) loss: 0.8174 (0.8150) time: 0.1935 data: 0.1087 max mem: 9377 +Train: [56] [5700/6250] eta: 0:01:19 lr: 0.000053 grad: 0.0961 (0.1014) loss: 0.8151 (0.8149) time: 0.1507 data: 0.0680 max mem: 9377 +Train: [56] [5800/6250] eta: 0:01:05 lr: 0.000053 grad: 0.0980 (0.1014) loss: 0.8143 (0.8149) time: 0.1554 data: 0.0741 max mem: 9377 +Train: [56] [5900/6250] eta: 0:00:50 lr: 0.000053 grad: 0.1060 (0.1014) loss: 0.8112 (0.8148) time: 0.1314 data: 0.0449 max mem: 9377 +Train: [56] [6000/6250] eta: 0:00:36 lr: 0.000053 grad: 0.1034 (0.1015) loss: 0.8037 (0.8147) time: 0.1423 data: 0.0671 max mem: 9377 +Train: [56] [6100/6250] eta: 0:00:21 lr: 0.000053 grad: 0.1070 (0.1016) loss: 0.8036 (0.8146) time: 0.1454 data: 0.0615 max mem: 9377 +Train: [56] [6200/6250] eta: 0:00:07 lr: 0.000053 grad: 0.1019 (0.1016) loss: 0.8057 (0.8145) time: 0.1290 data: 0.0410 max mem: 9377 +Train: [56] [6249/6250] eta: 0:00:00 lr: 0.000053 grad: 0.1017 (0.1017) loss: 0.8120 (0.8145) time: 0.1518 data: 0.0660 max mem: 9377 +Train: [56] Total time: 0:15:08 (0.1453 s / it) +Averaged stats: lr: 0.000053 grad: 0.1017 (0.1017) loss: 0.8120 (0.8145) +Eval (hcp-train-subset): [56] [ 0/62] eta: 0:05:35 loss: 0.8313 (0.8313) time: 5.4106 data: 5.3805 max mem: 9377 +Eval (hcp-train-subset): [56] [61/62] eta: 0:00:00 loss: 0.8305 (0.8284) time: 0.1349 data: 0.1077 max mem: 9377 +Eval (hcp-train-subset): [56] Total time: 0:00:13 (0.2102 s / it) +Averaged stats (hcp-train-subset): loss: 0.8305 (0.8284) +Eval (hcp-val): [56] [ 0/62] eta: 0:05:41 loss: 0.8365 (0.8365) time: 5.5130 data: 5.4822 max mem: 9377 +Eval (hcp-val): [56] [61/62] eta: 0:00:00 loss: 0.8336 (0.8356) time: 0.1315 data: 0.1064 max mem: 9377 +Eval (hcp-val): [56] Total time: 0:00:13 (0.2150 s / it) +Averaged stats (hcp-val): loss: 0.8336 (0.8356) +Eval (nsd-val): [56] [ 0/62] eta: 0:04:47 loss: 0.8010 (0.8010) time: 4.6333 data: 4.6002 max mem: 9377 +Eval (nsd-val): [56] [61/62] eta: 0:00:00 loss: 0.8120 (0.8109) time: 0.1277 data: 0.1019 max mem: 9377 +Eval (nsd-val): [56] Total time: 0:00:12 (0.1954 s / it) +Averaged stats (nsd-val): loss: 0.8120 (0.8109) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [57] [ 0/6250] eta: 10:49:55 lr: 0.000053 grad: 0.0847 (0.0847) loss: 0.8547 (0.8547) time: 6.2393 data: 6.1473 max mem: 9377 +Train: [57] [ 100/6250] eta: 0:19:19 lr: 0.000053 grad: 0.0997 (0.1181) loss: 0.8177 (0.8237) time: 0.1422 data: 0.0475 max mem: 9377 +Train: [57] [ 200/6250] eta: 0:16:38 lr: 0.000053 grad: 0.0967 (0.1148) loss: 0.8116 (0.8195) time: 0.1421 data: 0.0542 max mem: 9377 +Train: [57] [ 300/6250] eta: 0:15:26 lr: 0.000053 grad: 0.1025 (0.1162) loss: 0.8161 (0.8173) time: 0.1188 data: 0.0262 max mem: 9377 +Train: [57] [ 400/6250] eta: 0:14:50 lr: 0.000053 grad: 0.1045 (0.1134) loss: 0.8160 (0.8177) time: 0.1610 data: 0.0675 max mem: 9377 +Train: [57] [ 500/6250] eta: 0:14:32 lr: 0.000053 grad: 0.0910 (0.1101) loss: 0.8148 (0.8179) time: 0.1355 data: 0.0463 max mem: 9377 +Train: [57] [ 600/6250] eta: 0:14:12 lr: 0.000053 grad: 0.0889 (0.1080) loss: 0.8247 (0.8182) time: 0.1330 data: 0.0408 max mem: 9377 +Train: [57] [ 700/6250] eta: 0:13:51 lr: 0.000053 grad: 0.0904 (0.1057) loss: 0.8248 (0.8185) time: 0.1366 data: 0.0531 max mem: 9377 +Train: [57] [ 800/6250] eta: 0:13:29 lr: 0.000053 grad: 0.0925 (0.1043) loss: 0.8176 (0.8189) time: 0.1405 data: 0.0532 max mem: 9377 +Train: [57] [ 900/6250] eta: 0:13:12 lr: 0.000053 grad: 0.0959 (0.1032) loss: 0.8169 (0.8192) time: 0.1440 data: 0.0643 max mem: 9377 +Train: [57] [1000/6250] eta: 0:12:50 lr: 0.000053 grad: 0.0872 (0.1024) loss: 0.8241 (0.8194) time: 0.1336 data: 0.0512 max mem: 9377 +Train: [57] [1100/6250] eta: 0:12:31 lr: 0.000053 grad: 0.0947 (0.1016) loss: 0.8234 (0.8196) time: 0.1551 data: 0.0725 max mem: 9377 +Train: [57] [1200/6250] eta: 0:12:10 lr: 0.000053 grad: 0.0943 (0.1010) loss: 0.8163 (0.8195) time: 0.1461 data: 0.0526 max mem: 9377 +Train: [57] [1300/6250] eta: 0:11:49 lr: 0.000053 grad: 0.0964 (0.1009) loss: 0.8142 (0.8193) time: 0.1329 data: 0.0410 max mem: 9377 +Train: [57] [1400/6250] eta: 0:11:34 lr: 0.000053 grad: 0.0971 (0.1007) loss: 0.8216 (0.8191) time: 0.1620 data: 0.0822 max mem: 9377 +Train: [57] [1500/6250] eta: 0:11:18 lr: 0.000053 grad: 0.0935 (0.1008) loss: 0.8174 (0.8189) time: 0.1222 data: 0.0371 max mem: 9377 +Train: [57] [1600/6250] eta: 0:11:03 lr: 0.000053 grad: 0.0892 (0.1007) loss: 0.8174 (0.8186) time: 0.1456 data: 0.0559 max mem: 9377 +Train: [57] [1700/6250] eta: 0:10:49 lr: 0.000053 grad: 0.0998 (0.1007) loss: 0.8201 (0.8184) time: 0.1544 data: 0.0740 max mem: 9377 +Train: [57] [1800/6250] eta: 0:10:37 lr: 0.000053 grad: 0.1029 (0.1005) loss: 0.8108 (0.8183) time: 0.1568 data: 0.0772 max mem: 9377 +Train: [57] [1900/6250] eta: 0:10:26 lr: 0.000053 grad: 0.0990 (0.1003) loss: 0.8169 (0.8182) time: 0.1713 data: 0.0965 max mem: 9377 +Train: [57] [2000/6250] eta: 0:10:13 lr: 0.000053 grad: 0.0957 (0.1002) loss: 0.8202 (0.8181) time: 0.1669 data: 0.0873 max mem: 9377 +Train: [57] [2100/6250] eta: 0:10:03 lr: 0.000053 grad: 0.1042 (0.1002) loss: 0.8097 (0.8179) time: 0.1698 data: 0.0881 max mem: 9377 +Train: [57] [2200/6250] eta: 0:09:49 lr: 0.000053 grad: 0.1044 (0.1002) loss: 0.8104 (0.8177) time: 0.1533 data: 0.0760 max mem: 9377 +Train: [57] [2300/6250] eta: 0:09:35 lr: 0.000052 grad: 0.0949 (0.1001) loss: 0.8220 (0.8176) time: 0.1420 data: 0.0557 max mem: 9377 +Train: [57] [2400/6250] eta: 0:09:21 lr: 0.000052 grad: 0.0957 (0.1002) loss: 0.8207 (0.8175) time: 0.1545 data: 0.0725 max mem: 9377 +Train: [57] [2500/6250] eta: 0:09:06 lr: 0.000052 grad: 0.1016 (0.1001) loss: 0.8106 (0.8174) time: 0.1452 data: 0.0633 max mem: 9377 +Train: [57] [2600/6250] eta: 0:08:51 lr: 0.000052 grad: 0.1073 (0.1004) loss: 0.8101 (0.8173) time: 0.1374 data: 0.0554 max mem: 9377 +Train: [57] [2700/6250] eta: 0:08:36 lr: 0.000052 grad: 0.1008 (0.1006) loss: 0.8153 (0.8172) time: 0.1326 data: 0.0528 max mem: 9377 +Train: [57] [2800/6250] eta: 0:08:20 lr: 0.000052 grad: 0.1041 (0.1007) loss: 0.8058 (0.8170) time: 0.1354 data: 0.0545 max mem: 9377 +Train: [57] [2900/6250] eta: 0:08:05 lr: 0.000052 grad: 0.1032 (0.1008) loss: 0.8079 (0.8168) time: 0.1391 data: 0.0561 max mem: 9377 +Train: [57] [3000/6250] eta: 0:07:50 lr: 0.000052 grad: 0.1001 (0.1009) loss: 0.8097 (0.8164) time: 0.1545 data: 0.0751 max mem: 9377 +Train: [57] [3100/6250] eta: 0:07:34 lr: 0.000052 grad: 0.1022 (0.1010) loss: 0.8119 (0.8163) time: 0.1268 data: 0.0459 max mem: 9377 +Train: [57] [3200/6250] eta: 0:07:19 lr: 0.000052 grad: 0.1057 (0.1011) loss: 0.8162 (0.8161) time: 0.1323 data: 0.0464 max mem: 9377 +Train: [57] [3300/6250] eta: 0:07:04 lr: 0.000052 grad: 0.1014 (0.1013) loss: 0.8117 (0.8160) time: 0.1451 data: 0.0640 max mem: 9377 +Train: [57] [3400/6250] eta: 0:06:49 lr: 0.000052 grad: 0.1025 (0.1013) loss: 0.8118 (0.8159) time: 0.1219 data: 0.0401 max mem: 9377 +Train: [57] [3500/6250] eta: 0:06:34 lr: 0.000052 grad: 0.1086 (0.1014) loss: 0.8171 (0.8159) time: 0.1310 data: 0.0527 max mem: 9377 +Train: [57] [3600/6250] eta: 0:06:19 lr: 0.000052 grad: 0.0998 (0.1013) loss: 0.8110 (0.8159) time: 0.1086 data: 0.0297 max mem: 9377 +Train: [57] [3700/6250] eta: 0:06:05 lr: 0.000052 grad: 0.1055 (0.1013) loss: 0.8200 (0.8159) time: 0.1630 data: 0.0847 max mem: 9377 +Train: [57] [3800/6250] eta: 0:05:51 lr: 0.000052 grad: 0.0984 (0.1014) loss: 0.8094 (0.8159) time: 0.1277 data: 0.0472 max mem: 9377 +Train: [57] [3900/6250] eta: 0:05:37 lr: 0.000052 grad: 0.0964 (0.1015) loss: 0.8204 (0.8159) time: 0.1699 data: 0.0902 max mem: 9377 +Train: [57] [4000/6250] eta: 0:05:23 lr: 0.000052 grad: 0.1114 (0.1018) loss: 0.8095 (0.8159) time: 0.1440 data: 0.0627 max mem: 9377 +Train: [57] [4100/6250] eta: 0:05:09 lr: 0.000052 grad: 0.1027 (0.1018) loss: 0.8176 (0.8159) time: 0.1523 data: 0.0670 max mem: 9377 +Train: [57] [4200/6250] eta: 0:04:55 lr: 0.000052 grad: 0.0969 (0.1019) loss: 0.8135 (0.8158) time: 0.1592 data: 0.0823 max mem: 9377 +Train: [57] [4300/6250] eta: 0:04:40 lr: 0.000052 grad: 0.1043 (0.1019) loss: 0.8123 (0.8158) time: 0.1426 data: 0.0592 max mem: 9377 +Train: [57] [4400/6250] eta: 0:04:26 lr: 0.000052 grad: 0.1080 (0.1020) loss: 0.8132 (0.8157) time: 0.1360 data: 0.0552 max mem: 9377 +Train: [57] [4500/6250] eta: 0:04:11 lr: 0.000052 grad: 0.1078 (0.1021) loss: 0.8079 (0.8157) time: 0.1198 data: 0.0362 max mem: 9377 +Train: [57] [4600/6250] eta: 0:03:57 lr: 0.000052 grad: 0.0933 (0.1021) loss: 0.8182 (0.8157) time: 0.1661 data: 0.0885 max mem: 9377 +Train: [57] [4700/6250] eta: 0:03:42 lr: 0.000052 grad: 0.1001 (0.1022) loss: 0.8141 (0.8157) time: 0.1599 data: 0.0796 max mem: 9377 +Train: [57] [4800/6250] eta: 0:03:28 lr: 0.000052 grad: 0.1054 (0.1023) loss: 0.8090 (0.8156) time: 0.1641 data: 0.0806 max mem: 9377 +Train: [57] [4900/6250] eta: 0:03:14 lr: 0.000052 grad: 0.1024 (0.1023) loss: 0.8145 (0.8156) time: 0.1562 data: 0.0758 max mem: 9377 +Train: [57] [5000/6250] eta: 0:02:59 lr: 0.000052 grad: 0.1010 (0.1024) loss: 0.8058 (0.8155) time: 0.1406 data: 0.0480 max mem: 9377 +Train: [57] [5100/6250] eta: 0:02:45 lr: 0.000052 grad: 0.1011 (0.1024) loss: 0.8103 (0.8154) time: 0.1402 data: 0.0590 max mem: 9377 +Train: [57] [5200/6250] eta: 0:02:30 lr: 0.000052 grad: 0.0964 (0.1024) loss: 0.8226 (0.8154) time: 0.1247 data: 0.0379 max mem: 9377 +Train: [57] [5300/6250] eta: 0:02:16 lr: 0.000052 grad: 0.0996 (0.1023) loss: 0.8100 (0.8154) time: 0.1707 data: 0.0921 max mem: 9377 +Train: [57] [5400/6250] eta: 0:02:02 lr: 0.000051 grad: 0.1033 (0.1024) loss: 0.8178 (0.8153) time: 0.1340 data: 0.0529 max mem: 9377 +Train: [57] [5500/6250] eta: 0:01:48 lr: 0.000051 grad: 0.0948 (0.1024) loss: 0.8161 (0.8153) time: 0.1550 data: 0.0689 max mem: 9377 +Train: [57] [5600/6250] eta: 0:01:33 lr: 0.000051 grad: 0.1064 (0.1024) loss: 0.8132 (0.8153) time: 0.1617 data: 0.0834 max mem: 9377 +Train: [57] [5700/6250] eta: 0:01:19 lr: 0.000051 grad: 0.1089 (0.1025) loss: 0.8115 (0.8153) time: 0.1660 data: 0.0855 max mem: 9377 +Train: [57] [5800/6250] eta: 0:01:05 lr: 0.000051 grad: 0.1057 (0.1025) loss: 0.8150 (0.8153) time: 0.1426 data: 0.0566 max mem: 9377 +Train: [57] [5900/6250] eta: 0:00:50 lr: 0.000051 grad: 0.0945 (0.1026) loss: 0.8184 (0.8152) time: 0.1591 data: 0.0713 max mem: 9377 +Train: [57] [6000/6250] eta: 0:00:36 lr: 0.000051 grad: 0.0988 (0.1026) loss: 0.8195 (0.8152) time: 0.1470 data: 0.0627 max mem: 9377 +Train: [57] [6100/6250] eta: 0:00:21 lr: 0.000051 grad: 0.1024 (0.1026) loss: 0.8105 (0.8152) time: 0.1304 data: 0.0404 max mem: 9377 +Train: [57] [6200/6250] eta: 0:00:07 lr: 0.000051 grad: 0.1021 (0.1027) loss: 0.8094 (0.8152) time: 0.1277 data: 0.0479 max mem: 9377 +Train: [57] [6249/6250] eta: 0:00:00 lr: 0.000051 grad: 0.1032 (0.1027) loss: 0.8102 (0.8152) time: 0.1678 data: 0.0861 max mem: 9377 +Train: [57] Total time: 0:15:10 (0.1456 s / it) +Averaged stats: lr: 0.000051 grad: 0.1032 (0.1027) loss: 0.8102 (0.8152) +Eval (hcp-train-subset): [57] [ 0/62] eta: 0:03:42 loss: 0.8322 (0.8322) time: 3.5809 data: 3.4988 max mem: 9377 +Eval (hcp-train-subset): [57] [61/62] eta: 0:00:00 loss: 0.8301 (0.8287) time: 0.1280 data: 0.1032 max mem: 9377 +Eval (hcp-train-subset): [57] Total time: 0:00:14 (0.2262 s / it) +Averaged stats (hcp-train-subset): loss: 0.8301 (0.8287) +Eval (hcp-val): [57] [ 0/62] eta: 0:04:03 loss: 0.8293 (0.8293) time: 3.9216 data: 3.8412 max mem: 9377 +Eval (hcp-val): [57] [61/62] eta: 0:00:00 loss: 0.8323 (0.8345) time: 0.1042 data: 0.0772 max mem: 9377 +Eval (hcp-val): [57] Total time: 0:00:13 (0.2127 s / it) +Averaged stats (hcp-val): loss: 0.8323 (0.8345) +Eval (nsd-val): [57] [ 0/62] eta: 0:04:46 loss: 0.7981 (0.7981) time: 4.6264 data: 4.5961 max mem: 9377 +Eval (nsd-val): [57] [61/62] eta: 0:00:00 loss: 0.8079 (0.8103) time: 0.1229 data: 0.0980 max mem: 9377 +Eval (nsd-val): [57] Total time: 0:00:12 (0.2029 s / it) +Averaged stats (nsd-val): loss: 0.8079 (0.8103) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [58] [ 0/6250] eta: 8:31:51 lr: 0.000051 grad: 0.1164 (0.1164) loss: 0.8593 (0.8593) time: 4.9138 data: 4.5719 max mem: 9377 +Train: [58] [ 100/6250] eta: 0:19:20 lr: 0.000051 grad: 0.1011 (0.1314) loss: 0.8161 (0.8185) time: 0.1358 data: 0.0288 max mem: 9377 +Train: [58] [ 200/6250] eta: 0:16:47 lr: 0.000051 grad: 0.0974 (0.1173) loss: 0.8159 (0.8169) time: 0.1529 data: 0.0520 max mem: 9377 +Train: [58] [ 300/6250] eta: 0:15:35 lr: 0.000051 grad: 0.1022 (0.1133) loss: 0.8077 (0.8163) time: 0.1255 data: 0.0287 max mem: 9377 +Train: [58] [ 400/6250] eta: 0:15:08 lr: 0.000051 grad: 0.0947 (0.1104) loss: 0.8104 (0.8160) time: 0.1522 data: 0.0596 max mem: 9377 +Train: [58] [ 500/6250] eta: 0:14:39 lr: 0.000051 grad: 0.0968 (0.1090) loss: 0.8118 (0.8151) time: 0.1185 data: 0.0325 max mem: 9377 +Train: [58] [ 600/6250] eta: 0:14:18 lr: 0.000051 grad: 0.0988 (0.1080) loss: 0.8106 (0.8149) time: 0.1196 data: 0.0365 max mem: 9377 +Train: [58] [ 700/6250] eta: 0:13:55 lr: 0.000051 grad: 0.0964 (0.1068) loss: 0.8108 (0.8146) time: 0.1359 data: 0.0418 max mem: 9377 +Train: [58] [ 800/6250] eta: 0:13:27 lr: 0.000051 grad: 0.0994 (0.1064) loss: 0.8159 (0.8143) time: 0.1342 data: 0.0437 max mem: 9377 +Train: [58] [ 900/6250] eta: 0:13:10 lr: 0.000051 grad: 0.0982 (0.1059) loss: 0.8126 (0.8141) time: 0.1369 data: 0.0461 max mem: 9377 +Train: [58] [1000/6250] eta: 0:12:46 lr: 0.000051 grad: 0.1012 (0.1056) loss: 0.8188 (0.8141) time: 0.1329 data: 0.0477 max mem: 9377 +Train: [58] [1100/6250] eta: 0:12:23 lr: 0.000051 grad: 0.1064 (0.1054) loss: 0.8107 (0.8140) time: 0.1086 data: 0.0177 max mem: 9377 +Train: [58] [1200/6250] eta: 0:12:04 lr: 0.000051 grad: 0.1045 (0.1053) loss: 0.8050 (0.8137) time: 0.1411 data: 0.0569 max mem: 9377 +Train: [58] [1300/6250] eta: 0:11:45 lr: 0.000051 grad: 0.0978 (0.1050) loss: 0.8134 (0.8138) time: 0.1215 data: 0.0362 max mem: 9377 +Train: [58] [1400/6250] eta: 0:11:29 lr: 0.000051 grad: 0.1036 (0.1049) loss: 0.8105 (0.8134) time: 0.1149 data: 0.0271 max mem: 9377 +Train: [58] [1500/6250] eta: 0:11:14 lr: 0.000051 grad: 0.1014 (0.1049) loss: 0.8089 (0.8132) time: 0.1430 data: 0.0566 max mem: 9377 +Train: [58] [1600/6250] eta: 0:11:00 lr: 0.000051 grad: 0.1000 (0.1048) loss: 0.8132 (0.8131) time: 0.1395 data: 0.0593 max mem: 9377 +Train: [58] [1700/6250] eta: 0:10:44 lr: 0.000051 grad: 0.0955 (0.1046) loss: 0.8163 (0.8132) time: 0.1226 data: 0.0360 max mem: 9377 +Train: [58] [1800/6250] eta: 0:10:29 lr: 0.000051 grad: 0.1048 (0.1046) loss: 0.8148 (0.8133) time: 0.1182 data: 0.0303 max mem: 9377 +Train: [58] [1900/6250] eta: 0:10:15 lr: 0.000051 grad: 0.0978 (0.1045) loss: 0.8161 (0.8132) time: 0.1333 data: 0.0516 max mem: 9377 +Train: [58] [2000/6250] eta: 0:10:01 lr: 0.000051 grad: 0.0947 (0.1044) loss: 0.8141 (0.8132) time: 0.1325 data: 0.0481 max mem: 9377 +Train: [58] [2100/6250] eta: 0:09:49 lr: 0.000051 grad: 0.0978 (0.1043) loss: 0.8197 (0.8132) time: 0.1434 data: 0.0589 max mem: 9377 +Train: [58] [2200/6250] eta: 0:09:36 lr: 0.000050 grad: 0.1018 (0.1042) loss: 0.8132 (0.8132) time: 0.1523 data: 0.0694 max mem: 9377 +Train: [58] [2300/6250] eta: 0:09:23 lr: 0.000050 grad: 0.0948 (0.1042) loss: 0.8176 (0.8134) time: 0.1497 data: 0.0635 max mem: 9377 +Train: [58] [2400/6250] eta: 0:09:09 lr: 0.000050 grad: 0.0964 (0.1042) loss: 0.8108 (0.8134) time: 0.1404 data: 0.0548 max mem: 9377 +Train: [58] [2500/6250] eta: 0:08:54 lr: 0.000050 grad: 0.1042 (0.1043) loss: 0.8161 (0.8135) time: 0.1186 data: 0.0323 max mem: 9377 +Train: [58] [2600/6250] eta: 0:08:39 lr: 0.000050 grad: 0.0944 (0.1044) loss: 0.8148 (0.8135) time: 0.1456 data: 0.0670 max mem: 9377 +Train: [58] [2700/6250] eta: 0:08:24 lr: 0.000050 grad: 0.0998 (0.1043) loss: 0.8157 (0.8136) time: 0.1289 data: 0.0440 max mem: 9377 +Train: [58] [2800/6250] eta: 0:08:10 lr: 0.000050 grad: 0.1045 (0.1043) loss: 0.8035 (0.8136) time: 0.1539 data: 0.0803 max mem: 9377 +Train: [58] [2900/6250] eta: 0:07:56 lr: 0.000050 grad: 0.1008 (0.1043) loss: 0.8127 (0.8135) time: 0.1292 data: 0.0469 max mem: 9377 +Train: [58] [3000/6250] eta: 0:07:42 lr: 0.000050 grad: 0.1027 (0.1043) loss: 0.8159 (0.8136) time: 0.1441 data: 0.0551 max mem: 9377 +Train: [58] [3100/6250] eta: 0:07:29 lr: 0.000050 grad: 0.1061 (0.1044) loss: 0.8114 (0.8136) time: 0.1746 data: 0.0900 max mem: 9377 +Train: [58] [3200/6250] eta: 0:07:15 lr: 0.000050 grad: 0.1024 (0.1045) loss: 0.8087 (0.8135) time: 0.1565 data: 0.0742 max mem: 9377 +Train: [58] [3300/6250] eta: 0:07:01 lr: 0.000050 grad: 0.1072 (0.1046) loss: 0.8104 (0.8135) time: 0.1226 data: 0.0399 max mem: 9377 +Train: [58] [3400/6250] eta: 0:06:47 lr: 0.000050 grad: 0.1030 (0.1047) loss: 0.8143 (0.8134) time: 0.1452 data: 0.0634 max mem: 9377 +Train: [58] [3500/6250] eta: 0:06:33 lr: 0.000050 grad: 0.1067 (0.1047) loss: 0.8090 (0.8133) time: 0.1226 data: 0.0347 max mem: 9377 +Train: [58] [3600/6250] eta: 0:06:19 lr: 0.000050 grad: 0.1024 (0.1047) loss: 0.8122 (0.8133) time: 0.1409 data: 0.0623 max mem: 9377 +Train: [58] [3700/6250] eta: 0:06:04 lr: 0.000050 grad: 0.1037 (0.1046) loss: 0.8233 (0.8133) time: 0.1505 data: 0.0685 max mem: 9377 +Train: [58] [3800/6250] eta: 0:05:49 lr: 0.000050 grad: 0.1025 (0.1046) loss: 0.8150 (0.8133) time: 0.1329 data: 0.0376 max mem: 9377 +Train: [58] [3900/6250] eta: 0:05:35 lr: 0.000050 grad: 0.1115 (0.1047) loss: 0.8115 (0.8133) time: 0.1392 data: 0.0588 max mem: 9377 +Train: [58] [4000/6250] eta: 0:05:21 lr: 0.000050 grad: 0.1055 (0.1047) loss: 0.8117 (0.8133) time: 0.1508 data: 0.0696 max mem: 9377 +Train: [58] [4100/6250] eta: 0:05:06 lr: 0.000050 grad: 0.1006 (0.1047) loss: 0.8175 (0.8134) time: 0.1533 data: 0.0754 max mem: 9377 +Train: [58] [4200/6250] eta: 0:04:52 lr: 0.000050 grad: 0.1000 (0.1047) loss: 0.8166 (0.8135) time: 0.1606 data: 0.0763 max mem: 9377 +Train: [58] [4300/6250] eta: 0:04:38 lr: 0.000050 grad: 0.1008 (0.1046) loss: 0.8135 (0.8135) time: 0.1685 data: 0.0831 max mem: 9377 +Train: [58] [4400/6250] eta: 0:04:24 lr: 0.000050 grad: 0.0990 (0.1046) loss: 0.8160 (0.8135) time: 0.1525 data: 0.0758 max mem: 9377 +Train: [58] [4500/6250] eta: 0:04:10 lr: 0.000050 grad: 0.1018 (0.1045) loss: 0.8119 (0.8135) time: 0.1536 data: 0.0671 max mem: 9377 +Train: [58] [4600/6250] eta: 0:03:56 lr: 0.000050 grad: 0.1062 (0.1046) loss: 0.8091 (0.8135) time: 0.1451 data: 0.0647 max mem: 9377 +Train: [58] [4700/6250] eta: 0:03:42 lr: 0.000050 grad: 0.1027 (0.1046) loss: 0.8083 (0.8136) time: 0.1478 data: 0.0647 max mem: 9377 +Train: [58] [4800/6250] eta: 0:03:28 lr: 0.000050 grad: 0.0992 (0.1045) loss: 0.8140 (0.8135) time: 0.1549 data: 0.0722 max mem: 9377 +Train: [58] [4900/6250] eta: 0:03:14 lr: 0.000050 grad: 0.0984 (0.1045) loss: 0.8184 (0.8135) time: 0.1641 data: 0.0897 max mem: 9377 +Train: [58] [5000/6250] eta: 0:03:00 lr: 0.000050 grad: 0.0953 (0.1045) loss: 0.8197 (0.8136) time: 0.1448 data: 0.0654 max mem: 9377 +Train: [58] [5100/6250] eta: 0:02:45 lr: 0.000050 grad: 0.1043 (0.1045) loss: 0.8047 (0.8135) time: 0.1364 data: 0.0532 max mem: 9377 +Train: [58] [5200/6250] eta: 0:02:32 lr: 0.000050 grad: 0.1033 (0.1046) loss: 0.8150 (0.8135) time: 0.1753 data: 0.0929 max mem: 9377 +Train: [58] [5300/6250] eta: 0:02:18 lr: 0.000049 grad: 0.0944 (0.1045) loss: 0.8180 (0.8135) time: 0.1458 data: 0.0649 max mem: 9377 +Train: [58] [5400/6250] eta: 0:02:03 lr: 0.000049 grad: 0.0959 (0.1044) loss: 0.8194 (0.8136) time: 0.1423 data: 0.0638 max mem: 9377 +Train: [58] [5500/6250] eta: 0:01:49 lr: 0.000049 grad: 0.1005 (0.1043) loss: 0.8139 (0.8136) time: 0.1795 data: 0.0944 max mem: 9377 +Train: [58] [5600/6250] eta: 0:01:35 lr: 0.000049 grad: 0.0991 (0.1042) loss: 0.8195 (0.8137) time: 0.1664 data: 0.0867 max mem: 9377 +Train: [58] [5700/6250] eta: 0:01:20 lr: 0.000049 grad: 0.0968 (0.1042) loss: 0.8173 (0.8138) time: 0.1506 data: 0.0625 max mem: 9377 +Train: [58] [5800/6250] eta: 0:01:06 lr: 0.000049 grad: 0.1039 (0.1041) loss: 0.8145 (0.8139) time: 0.1398 data: 0.0528 max mem: 9377 +Train: [58] [5900/6250] eta: 0:00:51 lr: 0.000049 grad: 0.1011 (0.1040) loss: 0.8179 (0.8139) time: 0.1524 data: 0.0624 max mem: 9377 +Train: [58] [6000/6250] eta: 0:00:36 lr: 0.000049 grad: 0.0994 (0.1040) loss: 0.8160 (0.8139) time: 0.1366 data: 0.0577 max mem: 9377 +Train: [58] [6100/6250] eta: 0:00:22 lr: 0.000049 grad: 0.0950 (0.1039) loss: 0.8119 (0.8140) time: 0.1405 data: 0.0583 max mem: 9377 +Train: [58] [6200/6250] eta: 0:00:07 lr: 0.000049 grad: 0.0937 (0.1038) loss: 0.8148 (0.8140) time: 0.1420 data: 0.0611 max mem: 9377 +Train: [58] [6249/6250] eta: 0:00:00 lr: 0.000049 grad: 0.1054 (0.1038) loss: 0.8097 (0.8140) time: 0.1479 data: 0.0633 max mem: 9377 +Train: [58] Total time: 0:15:23 (0.1477 s / it) +Averaged stats: lr: 0.000049 grad: 0.1054 (0.1038) loss: 0.8097 (0.8140) +Eval (hcp-train-subset): [58] [ 0/62] eta: 0:06:16 loss: 0.8325 (0.8325) time: 6.0688 data: 6.0345 max mem: 9377 +Eval (hcp-train-subset): [58] [61/62] eta: 0:00:00 loss: 0.8287 (0.8291) time: 0.1421 data: 0.1169 max mem: 9377 +Eval (hcp-train-subset): [58] Total time: 0:00:14 (0.2324 s / it) +Averaged stats (hcp-train-subset): loss: 0.8287 (0.8291) +Eval (hcp-val): [58] [ 0/62] eta: 0:03:39 loss: 0.8315 (0.8315) time: 3.5433 data: 3.4582 max mem: 9377 +Eval (hcp-val): [58] [61/62] eta: 0:00:00 loss: 0.8335 (0.8347) time: 0.1105 data: 0.0849 max mem: 9377 +Eval (hcp-val): [58] Total time: 0:00:13 (0.2150 s / it) +Averaged stats (hcp-val): loss: 0.8335 (0.8347) +Eval (nsd-val): [58] [ 0/62] eta: 0:04:47 loss: 0.7982 (0.7982) time: 4.6328 data: 4.6017 max mem: 9377 +Eval (nsd-val): [58] [61/62] eta: 0:00:00 loss: 0.8099 (0.8119) time: 0.1099 data: 0.0846 max mem: 9377 +Eval (nsd-val): [58] Total time: 0:00:13 (0.2123 s / it) +Averaged stats (nsd-val): loss: 0.8099 (0.8119) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [59] [ 0/6250] eta: 10:02:23 lr: 0.000049 grad: 0.0869 (0.0869) loss: 0.8484 (0.8484) time: 5.7829 data: 5.6441 max mem: 9377 +Train: [59] [ 100/6250] eta: 0:20:21 lr: 0.000049 grad: 0.1236 (0.1272) loss: 0.8034 (0.8228) time: 0.1529 data: 0.0508 max mem: 9377 +Train: [59] [ 200/6250] eta: 0:17:59 lr: 0.000049 grad: 0.1001 (0.1184) loss: 0.8127 (0.8189) time: 0.1841 data: 0.0814 max mem: 9377 +Train: [59] [ 300/6250] eta: 0:16:36 lr: 0.000049 grad: 0.1026 (0.1172) loss: 0.8164 (0.8169) time: 0.1574 data: 0.0659 max mem: 9377 +Train: [59] [ 400/6250] eta: 0:16:05 lr: 0.000049 grad: 0.0987 (0.1138) loss: 0.8173 (0.8167) time: 0.1367 data: 0.0447 max mem: 9377 +Train: [59] [ 500/6250] eta: 0:15:27 lr: 0.000049 grad: 0.0994 (0.1114) loss: 0.8080 (0.8161) time: 0.1443 data: 0.0496 max mem: 9377 +Train: [59] [ 600/6250] eta: 0:14:54 lr: 0.000049 grad: 0.0999 (0.1094) loss: 0.8140 (0.8157) time: 0.1342 data: 0.0451 max mem: 9377 +Train: [59] [ 700/6250] eta: 0:14:25 lr: 0.000049 grad: 0.0958 (0.1080) loss: 0.8145 (0.8155) time: 0.1160 data: 0.0179 max mem: 9377 +Train: [59] [ 800/6250] eta: 0:14:13 lr: 0.000049 grad: 0.1024 (0.1072) loss: 0.8152 (0.8155) time: 0.1344 data: 0.0500 max mem: 9377 +Train: [59] [ 900/6250] eta: 0:13:51 lr: 0.000049 grad: 0.0893 (0.1062) loss: 0.8230 (0.8156) time: 0.1508 data: 0.0593 max mem: 9377 +Train: [59] [1000/6250] eta: 0:13:24 lr: 0.000049 grad: 0.0934 (0.1054) loss: 0.8188 (0.8157) time: 0.1233 data: 0.0367 max mem: 9377 +Train: [59] [1100/6250] eta: 0:12:59 lr: 0.000049 grad: 0.0957 (0.1049) loss: 0.8074 (0.8155) time: 0.1126 data: 0.0292 max mem: 9377 +Train: [59] [1200/6250] eta: 0:12:36 lr: 0.000049 grad: 0.1021 (0.1045) loss: 0.8137 (0.8153) time: 0.1381 data: 0.0483 max mem: 9377 +Train: [59] [1300/6250] eta: 0:12:19 lr: 0.000049 grad: 0.0961 (0.1042) loss: 0.8139 (0.8152) time: 0.1395 data: 0.0590 max mem: 9377 +Train: [59] [1400/6250] eta: 0:12:03 lr: 0.000049 grad: 0.0941 (0.1037) loss: 0.8135 (0.8149) time: 0.1223 data: 0.0370 max mem: 9377 +Train: [59] [1500/6250] eta: 0:11:49 lr: 0.000049 grad: 0.0948 (0.1035) loss: 0.8150 (0.8148) time: 0.1559 data: 0.0745 max mem: 9377 +Train: [59] [1600/6250] eta: 0:11:35 lr: 0.000049 grad: 0.0969 (0.1032) loss: 0.8228 (0.8149) time: 0.1221 data: 0.0429 max mem: 9377 +Train: [59] [1700/6250] eta: 0:11:23 lr: 0.000049 grad: 0.0983 (0.1031) loss: 0.8121 (0.8147) time: 0.1688 data: 0.0954 max mem: 9377 +Train: [59] [1800/6250] eta: 0:11:10 lr: 0.000049 grad: 0.1009 (0.1029) loss: 0.8086 (0.8146) time: 0.1623 data: 0.0802 max mem: 9377 +Train: [59] [1900/6250] eta: 0:10:57 lr: 0.000049 grad: 0.0975 (0.1028) loss: 0.8167 (0.8146) time: 0.1537 data: 0.0719 max mem: 9377 +Train: [59] [2000/6250] eta: 0:10:43 lr: 0.000049 grad: 0.0948 (0.1027) loss: 0.8148 (0.8145) time: 0.1506 data: 0.0604 max mem: 9377 +Train: [59] [2100/6250] eta: 0:10:30 lr: 0.000048 grad: 0.1021 (0.1027) loss: 0.8097 (0.8145) time: 0.1546 data: 0.0742 max mem: 9377 +Train: [59] [2200/6250] eta: 0:10:16 lr: 0.000048 grad: 0.0984 (0.1027) loss: 0.8122 (0.8145) time: 0.1535 data: 0.0735 max mem: 9377 +Train: [59] [2300/6250] eta: 0:10:02 lr: 0.000048 grad: 0.0928 (0.1028) loss: 0.8190 (0.8145) time: 0.1818 data: 0.0998 max mem: 9377 +Train: [59] [2400/6250] eta: 0:09:45 lr: 0.000048 grad: 0.0993 (0.1027) loss: 0.8174 (0.8146) time: 0.1361 data: 0.0584 max mem: 9377 +Train: [59] [2500/6250] eta: 0:09:30 lr: 0.000048 grad: 0.0985 (0.1027) loss: 0.8167 (0.8146) time: 0.1440 data: 0.0605 max mem: 9377 +Train: [59] [2600/6250] eta: 0:09:14 lr: 0.000048 grad: 0.1002 (0.1028) loss: 0.8210 (0.8146) time: 0.1380 data: 0.0568 max mem: 9377 +Train: [59] [2700/6250] eta: 0:08:57 lr: 0.000048 grad: 0.0990 (0.1027) loss: 0.8134 (0.8147) time: 0.1468 data: 0.0680 max mem: 9377 +Train: [59] [2800/6250] eta: 0:08:41 lr: 0.000048 grad: 0.0981 (0.1027) loss: 0.8158 (0.8147) time: 0.1438 data: 0.0595 max mem: 9377 +Train: [59] [2900/6250] eta: 0:08:24 lr: 0.000048 grad: 0.0918 (0.1027) loss: 0.8180 (0.8147) time: 0.1362 data: 0.0503 max mem: 9377 +Train: [59] [3000/6250] eta: 0:08:09 lr: 0.000048 grad: 0.0963 (0.1025) loss: 0.8127 (0.8146) time: 0.1560 data: 0.0725 max mem: 9377 +Train: [59] [3100/6250] eta: 0:07:52 lr: 0.000048 grad: 0.0988 (0.1025) loss: 0.8180 (0.8147) time: 0.1222 data: 0.0388 max mem: 9377 +Train: [59] [3200/6250] eta: 0:07:36 lr: 0.000048 grad: 0.0941 (0.1024) loss: 0.8129 (0.8147) time: 0.1372 data: 0.0560 max mem: 9377 +Train: [59] [3300/6250] eta: 0:07:20 lr: 0.000048 grad: 0.0999 (0.1024) loss: 0.8151 (0.8147) time: 0.1368 data: 0.0491 max mem: 9377 +Train: [59] [3400/6250] eta: 0:07:04 lr: 0.000048 grad: 0.0986 (0.1024) loss: 0.8109 (0.8146) time: 0.1481 data: 0.0687 max mem: 9377 +Train: [59] [3500/6250] eta: 0:06:49 lr: 0.000048 grad: 0.1048 (0.1024) loss: 0.8070 (0.8145) time: 0.1520 data: 0.0715 max mem: 9377 +Train: [59] [3600/6250] eta: 0:06:34 lr: 0.000048 grad: 0.0969 (0.1025) loss: 0.8097 (0.8145) time: 0.1515 data: 0.0603 max mem: 9377 +Train: [59] [3700/6250] eta: 0:06:21 lr: 0.000048 grad: 0.0994 (0.1025) loss: 0.8180 (0.8144) time: 0.2387 data: 0.1622 max mem: 9377 +Train: [59] [3800/6250] eta: 0:06:05 lr: 0.000048 grad: 0.1020 (0.1026) loss: 0.8110 (0.8145) time: 0.1663 data: 0.0848 max mem: 9377 +Train: [59] [3900/6250] eta: 0:05:50 lr: 0.000048 grad: 0.1032 (0.1026) loss: 0.8171 (0.8145) time: 0.1401 data: 0.0564 max mem: 9377 +Train: [59] [4000/6250] eta: 0:05:35 lr: 0.000048 grad: 0.0992 (0.1027) loss: 0.8113 (0.8144) time: 0.1456 data: 0.0630 max mem: 9377 +Train: [59] [4100/6250] eta: 0:05:19 lr: 0.000048 grad: 0.1053 (0.1027) loss: 0.8064 (0.8144) time: 0.1503 data: 0.0748 max mem: 9377 +Train: [59] [4200/6250] eta: 0:05:04 lr: 0.000048 grad: 0.1071 (0.1028) loss: 0.8145 (0.8143) time: 0.1555 data: 0.0775 max mem: 9377 +Train: [59] [4300/6250] eta: 0:04:49 lr: 0.000048 grad: 0.0939 (0.1028) loss: 0.8162 (0.8143) time: 0.1447 data: 0.0627 max mem: 9377 +Train: [59] [4400/6250] eta: 0:04:34 lr: 0.000048 grad: 0.1036 (0.1028) loss: 0.8117 (0.8143) time: 0.1284 data: 0.0410 max mem: 9377 +Train: [59] [4500/6250] eta: 0:04:19 lr: 0.000048 grad: 0.0978 (0.1028) loss: 0.8183 (0.8144) time: 0.1317 data: 0.0481 max mem: 9377 +Train: [59] [4600/6250] eta: 0:04:04 lr: 0.000048 grad: 0.0986 (0.1028) loss: 0.8209 (0.8144) time: 0.1273 data: 0.0393 max mem: 9377 +Train: [59] [4700/6250] eta: 0:03:49 lr: 0.000048 grad: 0.0951 (0.1028) loss: 0.8192 (0.8144) time: 0.1373 data: 0.0574 max mem: 9377 +Train: [59] [4800/6250] eta: 0:03:33 lr: 0.000048 grad: 0.1025 (0.1027) loss: 0.8105 (0.8144) time: 0.1353 data: 0.0532 max mem: 9377 +Train: [59] [4900/6250] eta: 0:03:19 lr: 0.000048 grad: 0.0919 (0.1027) loss: 0.8205 (0.8145) time: 0.1958 data: 0.1185 max mem: 9377 +Train: [59] [5000/6250] eta: 0:03:04 lr: 0.000048 grad: 0.0995 (0.1027) loss: 0.8151 (0.8145) time: 0.1493 data: 0.0701 max mem: 9377 +Train: [59] [5100/6250] eta: 0:02:50 lr: 0.000048 grad: 0.0942 (0.1026) loss: 0.8206 (0.8146) time: 0.1632 data: 0.0744 max mem: 9377 +Train: [59] [5200/6250] eta: 0:02:35 lr: 0.000047 grad: 0.0971 (0.1026) loss: 0.8223 (0.8146) time: 0.1505 data: 0.0634 max mem: 9377 +Train: [59] [5300/6250] eta: 0:02:20 lr: 0.000047 grad: 0.0957 (0.1026) loss: 0.8228 (0.8147) time: 0.1460 data: 0.0583 max mem: 9377 +Train: [59] [5400/6250] eta: 0:02:05 lr: 0.000047 grad: 0.1018 (0.1027) loss: 0.8157 (0.8147) time: 0.1487 data: 0.0628 max mem: 9377 +Train: [59] [5500/6250] eta: 0:01:50 lr: 0.000047 grad: 0.0990 (0.1027) loss: 0.8157 (0.8147) time: 0.1464 data: 0.0620 max mem: 9377 +Train: [59] [5600/6250] eta: 0:01:35 lr: 0.000047 grad: 0.0969 (0.1027) loss: 0.8147 (0.8148) time: 0.1338 data: 0.0511 max mem: 9377 +Train: [59] [5700/6250] eta: 0:01:20 lr: 0.000047 grad: 0.1041 (0.1027) loss: 0.8107 (0.8148) time: 0.1227 data: 0.0394 max mem: 9377 +Train: [59] [5800/6250] eta: 0:01:06 lr: 0.000047 grad: 0.1017 (0.1028) loss: 0.8178 (0.8149) time: 0.1399 data: 0.0484 max mem: 9377 +Train: [59] [5900/6250] eta: 0:00:51 lr: 0.000047 grad: 0.0972 (0.1028) loss: 0.8201 (0.8149) time: 0.1428 data: 0.0552 max mem: 9377 +Train: [59] [6000/6250] eta: 0:00:36 lr: 0.000047 grad: 0.1024 (0.1028) loss: 0.8186 (0.8149) time: 0.1401 data: 0.0558 max mem: 9377 +Train: [59] [6100/6250] eta: 0:00:21 lr: 0.000047 grad: 0.1049 (0.1030) loss: 0.8110 (0.8149) time: 0.1390 data: 0.0556 max mem: 9377 +Train: [59] [6200/6250] eta: 0:00:07 lr: 0.000047 grad: 0.1028 (0.1031) loss: 0.8136 (0.8148) time: 0.1626 data: 0.0844 max mem: 9377 +Train: [59] [6249/6250] eta: 0:00:00 lr: 0.000047 grad: 0.1025 (0.1031) loss: 0.8163 (0.8148) time: 0.1454 data: 0.0656 max mem: 9377 +Train: [59] Total time: 0:15:16 (0.1467 s / it) +Averaged stats: lr: 0.000047 grad: 0.1025 (0.1031) loss: 0.8163 (0.8148) +Eval (hcp-train-subset): [59] [ 0/62] eta: 0:03:43 loss: 0.8315 (0.8315) time: 3.6049 data: 3.5180 max mem: 9377 +Eval (hcp-train-subset): [59] [61/62] eta: 0:00:00 loss: 0.8262 (0.8288) time: 0.1218 data: 0.0963 max mem: 9377 +Eval (hcp-train-subset): [59] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (hcp-train-subset): loss: 0.8262 (0.8288) +Making plots (hcp-train-subset): example=10 +Eval (hcp-val): [59] [ 0/62] eta: 0:05:54 loss: 0.8309 (0.8309) time: 5.7130 data: 5.6828 max mem: 9377 +Eval (hcp-val): [59] [61/62] eta: 0:00:00 loss: 0.8332 (0.8342) time: 0.0907 data: 0.0655 max mem: 9377 +Eval (hcp-val): [59] Total time: 0:00:13 (0.2158 s / it) +Averaged stats (hcp-val): loss: 0.8332 (0.8342) +Making plots (hcp-val): example=9 +Eval (nsd-val): [59] [ 0/62] eta: 0:04:10 loss: 0.7949 (0.7949) time: 4.0383 data: 3.9684 max mem: 9377 +Eval (nsd-val): [59] [61/62] eta: 0:00:00 loss: 0.8093 (0.8089) time: 0.1001 data: 0.0751 max mem: 9377 +Eval (nsd-val): [59] Total time: 0:00:13 (0.2219 s / it) +Averaged stats (nsd-val): loss: 0.8093 (0.8089) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00059.pth +Train: [60] [ 0/6250] eta: 9:34:33 lr: 0.000047 grad: 0.1508 (0.1508) loss: 0.7883 (0.7883) time: 5.5158 data: 5.3364 max mem: 9377 +Train: [60] [ 100/6250] eta: 0:21:19 lr: 0.000047 grad: 0.1067 (0.1210) loss: 0.8283 (0.8249) time: 0.1892 data: 0.0863 max mem: 9377 +Train: [60] [ 200/6250] eta: 0:18:11 lr: 0.000047 grad: 0.0923 (0.1158) loss: 0.8233 (0.8196) time: 0.1407 data: 0.0515 max mem: 9377 +Train: [60] [ 300/6250] eta: 0:16:51 lr: 0.000047 grad: 0.0876 (0.1095) loss: 0.8306 (0.8210) time: 0.1337 data: 0.0397 max mem: 9377 +Train: [60] [ 400/6250] eta: 0:15:59 lr: 0.000047 grad: 0.1031 (0.1088) loss: 0.8131 (0.8208) time: 0.1448 data: 0.0608 max mem: 9377 +Train: [60] [ 500/6250] eta: 0:15:27 lr: 0.000047 grad: 0.1051 (0.1088) loss: 0.8083 (0.8194) time: 0.1393 data: 0.0592 max mem: 9377 +Train: [60] [ 600/6250] eta: 0:15:00 lr: 0.000047 grad: 0.1018 (0.1078) loss: 0.8091 (0.8185) time: 0.1440 data: 0.0593 max mem: 9377 +Train: [60] [ 700/6250] eta: 0:14:31 lr: 0.000047 grad: 0.0953 (0.1066) loss: 0.8202 (0.8184) time: 0.1514 data: 0.0548 max mem: 9377 +Train: [60] [ 800/6250] eta: 0:14:09 lr: 0.000047 grad: 0.1020 (0.1059) loss: 0.8201 (0.8182) time: 0.1540 data: 0.0694 max mem: 9377 +Train: [60] [ 900/6250] eta: 0:13:49 lr: 0.000047 grad: 0.1006 (0.1054) loss: 0.8171 (0.8183) time: 0.1572 data: 0.0797 max mem: 9377 +Train: [60] [1000/6250] eta: 0:13:30 lr: 0.000047 grad: 0.0967 (0.1047) loss: 0.8210 (0.8183) time: 0.1368 data: 0.0545 max mem: 9377 +Train: [60] [1100/6250] eta: 0:13:11 lr: 0.000047 grad: 0.0891 (0.1039) loss: 0.8241 (0.8186) time: 0.1373 data: 0.0558 max mem: 9377 +Train: [60] [1200/6250] eta: 0:12:52 lr: 0.000047 grad: 0.0969 (0.1034) loss: 0.8170 (0.8187) time: 0.1558 data: 0.0767 max mem: 9377 +Train: [60] [1300/6250] eta: 0:12:40 lr: 0.000047 grad: 0.0960 (0.1029) loss: 0.8178 (0.8187) time: 0.1792 data: 0.1059 max mem: 9377 +Train: [60] [1400/6250] eta: 0:12:28 lr: 0.000047 grad: 0.1028 (0.1026) loss: 0.8169 (0.8187) time: 0.1454 data: 0.0643 max mem: 9377 +Train: [60] [1500/6250] eta: 0:12:17 lr: 0.000047 grad: 0.0984 (0.1025) loss: 0.8184 (0.8185) time: 0.1660 data: 0.0873 max mem: 9377 +Train: [60] [1600/6250] eta: 0:12:02 lr: 0.000047 grad: 0.0973 (0.1022) loss: 0.8110 (0.8185) time: 0.1475 data: 0.0698 max mem: 9377 +Train: [60] [1700/6250] eta: 0:11:53 lr: 0.000047 grad: 0.0936 (0.1021) loss: 0.8147 (0.8185) time: 0.1902 data: 0.1140 max mem: 9377 +Train: [60] [1800/6250] eta: 0:11:40 lr: 0.000047 grad: 0.0942 (0.1020) loss: 0.8203 (0.8185) time: 0.1935 data: 0.1145 max mem: 9377 +Train: [60] [1900/6250] eta: 0:11:24 lr: 0.000047 grad: 0.0994 (0.1018) loss: 0.8148 (0.8183) time: 0.1639 data: 0.0806 max mem: 9377 +Train: [60] [2000/6250] eta: 0:11:08 lr: 0.000047 grad: 0.0994 (0.1018) loss: 0.8227 (0.8182) time: 0.1414 data: 0.0645 max mem: 9377 +Train: [60] [2100/6250] eta: 0:10:54 lr: 0.000046 grad: 0.0975 (0.1018) loss: 0.8143 (0.8181) time: 0.1602 data: 0.0788 max mem: 9377 +Train: [60] [2200/6250] eta: 0:10:36 lr: 0.000046 grad: 0.1014 (0.1019) loss: 0.8154 (0.8180) time: 0.1364 data: 0.0542 max mem: 9377 +Train: [60] [2300/6250] eta: 0:10:16 lr: 0.000046 grad: 0.1012 (0.1020) loss: 0.8100 (0.8179) time: 0.1388 data: 0.0514 max mem: 9377 +Train: [60] [2400/6250] eta: 0:09:57 lr: 0.000046 grad: 0.1090 (0.1022) loss: 0.8133 (0.8178) time: 0.1490 data: 0.0646 max mem: 9377 +Train: [60] [2500/6250] eta: 0:09:40 lr: 0.000046 grad: 0.1066 (0.1024) loss: 0.8137 (0.8176) time: 0.1377 data: 0.0569 max mem: 9377 +Train: [60] [2600/6250] eta: 0:09:24 lr: 0.000046 grad: 0.1080 (0.1025) loss: 0.8140 (0.8175) time: 0.1344 data: 0.0542 max mem: 9377 +Train: [60] [2700/6250] eta: 0:09:07 lr: 0.000046 grad: 0.0939 (0.1026) loss: 0.8165 (0.8174) time: 0.1434 data: 0.0600 max mem: 9377 +Train: [60] [2800/6250] eta: 0:08:51 lr: 0.000046 grad: 0.1014 (0.1027) loss: 0.8111 (0.8173) time: 0.1488 data: 0.0656 max mem: 9377 +Train: [60] [2900/6250] eta: 0:08:35 lr: 0.000046 grad: 0.1047 (0.1029) loss: 0.8154 (0.8172) time: 0.1610 data: 0.0853 max mem: 9377 +Train: [60] [3000/6250] eta: 0:08:19 lr: 0.000046 grad: 0.1061 (0.1031) loss: 0.8110 (0.8171) time: 0.1454 data: 0.0643 max mem: 9377 +Train: [60] [3100/6250] eta: 0:08:03 lr: 0.000046 grad: 0.1013 (0.1033) loss: 0.8147 (0.8169) time: 0.1621 data: 0.0835 max mem: 9377 +Train: [60] [3200/6250] eta: 0:07:47 lr: 0.000046 grad: 0.1025 (0.1034) loss: 0.8144 (0.8168) time: 0.1129 data: 0.0326 max mem: 9377 +Train: [60] [3300/6250] eta: 0:07:30 lr: 0.000046 grad: 0.1039 (0.1036) loss: 0.8133 (0.8166) time: 0.1283 data: 0.0429 max mem: 9377 +Train: [60] [3400/6250] eta: 0:07:14 lr: 0.000046 grad: 0.1058 (0.1038) loss: 0.8031 (0.8164) time: 0.1322 data: 0.0476 max mem: 9377 +Train: [60] [3500/6250] eta: 0:06:57 lr: 0.000046 grad: 0.1061 (0.1039) loss: 0.8125 (0.8162) time: 0.1293 data: 0.0459 max mem: 9377 +Train: [60] [3600/6250] eta: 0:06:41 lr: 0.000046 grad: 0.1011 (0.1040) loss: 0.8136 (0.8160) time: 0.1330 data: 0.0506 max mem: 9377 +Train: [60] [3700/6250] eta: 0:06:25 lr: 0.000046 grad: 0.1047 (0.1040) loss: 0.8087 (0.8159) time: 0.1464 data: 0.0642 max mem: 9377 +Train: [60] [3800/6250] eta: 0:06:09 lr: 0.000046 grad: 0.1051 (0.1040) loss: 0.8076 (0.8157) time: 0.1417 data: 0.0569 max mem: 9377 +Train: [60] [3900/6250] eta: 0:05:54 lr: 0.000046 grad: 0.1050 (0.1041) loss: 0.8058 (0.8155) time: 0.1347 data: 0.0543 max mem: 9377 +Train: [60] [4000/6250] eta: 0:05:38 lr: 0.000046 grad: 0.1000 (0.1041) loss: 0.8144 (0.8153) time: 0.1392 data: 0.0563 max mem: 9377 +Train: [60] [4100/6250] eta: 0:05:22 lr: 0.000046 grad: 0.1093 (0.1042) loss: 0.8080 (0.8152) time: 0.1256 data: 0.0366 max mem: 9377 +Train: [60] [4200/6250] eta: 0:05:07 lr: 0.000046 grad: 0.1040 (0.1042) loss: 0.8076 (0.8150) time: 0.1350 data: 0.0488 max mem: 9377 +Train: [60] [4300/6250] eta: 0:04:51 lr: 0.000046 grad: 0.1034 (0.1043) loss: 0.8098 (0.8149) time: 0.1425 data: 0.0552 max mem: 9377 +Train: [60] [4400/6250] eta: 0:04:36 lr: 0.000046 grad: 0.1072 (0.1044) loss: 0.8059 (0.8147) time: 0.1296 data: 0.0502 max mem: 9377 +Train: [60] [4500/6250] eta: 0:04:21 lr: 0.000046 grad: 0.1001 (0.1044) loss: 0.8205 (0.8147) time: 0.1417 data: 0.0594 max mem: 9377 +Train: [60] [4600/6250] eta: 0:04:06 lr: 0.000046 grad: 0.1043 (0.1044) loss: 0.8120 (0.8146) time: 0.1586 data: 0.0721 max mem: 9377 +Train: [60] [4700/6250] eta: 0:03:51 lr: 0.000046 grad: 0.1056 (0.1046) loss: 0.8083 (0.8145) time: 0.2012 data: 0.1243 max mem: 9377 +Train: [60] [4800/6250] eta: 0:03:37 lr: 0.000046 grad: 0.1074 (0.1046) loss: 0.8091 (0.8145) time: 0.1503 data: 0.0672 max mem: 9377 +Train: [60] [4900/6250] eta: 0:03:22 lr: 0.000046 grad: 0.1091 (0.1046) loss: 0.8126 (0.8144) time: 0.1632 data: 0.0762 max mem: 9377 +Train: [60] [5000/6250] eta: 0:03:07 lr: 0.000046 grad: 0.1052 (0.1047) loss: 0.8096 (0.8143) time: 0.1523 data: 0.0658 max mem: 9377 +Train: [60] [5100/6250] eta: 0:02:52 lr: 0.000046 grad: 0.1012 (0.1047) loss: 0.8122 (0.8143) time: 0.1475 data: 0.0587 max mem: 9377 +Train: [60] [5200/6250] eta: 0:02:37 lr: 0.000045 grad: 0.1077 (0.1048) loss: 0.8163 (0.8143) time: 0.1392 data: 0.0519 max mem: 9377 +Train: [60] [5300/6250] eta: 0:02:22 lr: 0.000045 grad: 0.1086 (0.1048) loss: 0.8146 (0.8143) time: 0.1517 data: 0.0594 max mem: 9377 +Train: [60] [5400/6250] eta: 0:02:07 lr: 0.000045 grad: 0.1017 (0.1048) loss: 0.8117 (0.8142) time: 0.1730 data: 0.0849 max mem: 9377 +Train: [60] [5500/6250] eta: 0:01:52 lr: 0.000045 grad: 0.0983 (0.1048) loss: 0.8175 (0.8142) time: 0.1391 data: 0.0494 max mem: 9377 +Train: [60] [5600/6250] eta: 0:01:37 lr: 0.000045 grad: 0.1071 (0.1048) loss: 0.8153 (0.8143) time: 0.1415 data: 0.0561 max mem: 9377 +Train: [60] [5700/6250] eta: 0:01:22 lr: 0.000045 grad: 0.1006 (0.1048) loss: 0.8104 (0.8143) time: 0.1462 data: 0.0624 max mem: 9377 +Train: [60] [5800/6250] eta: 0:01:07 lr: 0.000045 grad: 0.1074 (0.1049) loss: 0.8136 (0.8143) time: 0.1806 data: 0.1005 max mem: 9377 +Train: [60] [5900/6250] eta: 0:00:52 lr: 0.000045 grad: 0.1053 (0.1049) loss: 0.8112 (0.8143) time: 0.1539 data: 0.0796 max mem: 9377 +Train: [60] [6000/6250] eta: 0:00:37 lr: 0.000045 grad: 0.1051 (0.1050) loss: 0.8133 (0.8142) time: 0.1257 data: 0.0470 max mem: 9377 +Train: [60] [6100/6250] eta: 0:00:22 lr: 0.000045 grad: 0.1031 (0.1050) loss: 0.8087 (0.8142) time: 0.1607 data: 0.0808 max mem: 9377 +Train: [60] [6200/6250] eta: 0:00:07 lr: 0.000045 grad: 0.1040 (0.1050) loss: 0.8142 (0.8142) time: 0.1318 data: 0.0486 max mem: 9377 +Train: [60] [6249/6250] eta: 0:00:00 lr: 0.000045 grad: 0.1018 (0.1051) loss: 0.8141 (0.8142) time: 0.1330 data: 0.0437 max mem: 9377 +Train: [60] Total time: 0:15:47 (0.1516 s / it) +Averaged stats: lr: 0.000045 grad: 0.1018 (0.1051) loss: 0.8141 (0.8142) +Eval (hcp-train-subset): [60] [ 0/62] eta: 0:06:57 loss: 0.8331 (0.8331) time: 6.7328 data: 6.7009 max mem: 9377 +Eval (hcp-train-subset): [60] [61/62] eta: 0:00:00 loss: 0.8236 (0.8265) time: 0.1291 data: 0.1037 max mem: 9377 +Eval (hcp-train-subset): [60] Total time: 0:00:14 (0.2403 s / it) +Averaged stats (hcp-train-subset): loss: 0.8236 (0.8265) +Eval (hcp-val): [60] [ 0/62] eta: 0:06:05 loss: 0.8284 (0.8284) time: 5.8992 data: 5.8681 max mem: 9377 +Eval (hcp-val): [60] [61/62] eta: 0:00:00 loss: 0.8317 (0.8339) time: 0.1501 data: 0.1247 max mem: 9377 +Eval (hcp-val): [60] Total time: 0:00:15 (0.2438 s / it) +Averaged stats (hcp-val): loss: 0.8317 (0.8339) +Eval (nsd-val): [60] [ 0/62] eta: 0:05:01 loss: 0.8049 (0.8049) time: 4.8570 data: 4.7873 max mem: 9377 +Eval (nsd-val): [60] [61/62] eta: 0:00:00 loss: 0.8112 (0.8130) time: 0.1387 data: 0.1118 max mem: 9377 +Eval (nsd-val): [60] Total time: 0:00:16 (0.2608 s / it) +Averaged stats (nsd-val): loss: 0.8112 (0.8130) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [61] [ 0/6250] eta: 10:52:53 lr: 0.000045 grad: 0.1485 (0.1485) loss: 0.8122 (0.8122) time: 6.2678 data: 6.1336 max mem: 9377 +Train: [61] [ 100/6250] eta: 0:24:36 lr: 0.000045 grad: 0.1081 (0.1214) loss: 0.8266 (0.8299) time: 0.1659 data: 0.0643 max mem: 9377 +Train: [61] [ 200/6250] eta: 0:20:37 lr: 0.000045 grad: 0.1052 (0.1133) loss: 0.8175 (0.8263) time: 0.1761 data: 0.0726 max mem: 9377 +Train: [61] [ 300/6250] eta: 0:18:59 lr: 0.000045 grad: 0.0978 (0.1112) loss: 0.8180 (0.8229) time: 0.1662 data: 0.0651 max mem: 9377 +Train: [61] [ 400/6250] eta: 0:18:10 lr: 0.000045 grad: 0.0963 (0.1095) loss: 0.8173 (0.8207) time: 0.1778 data: 0.0795 max mem: 9377 +Train: [61] [ 500/6250] eta: 0:17:31 lr: 0.000045 grad: 0.1109 (0.1089) loss: 0.8127 (0.8188) time: 0.1899 data: 0.0877 max mem: 9377 +Train: [61] [ 600/6250] eta: 0:16:54 lr: 0.000045 grad: 0.0993 (0.1083) loss: 0.8103 (0.8176) time: 0.1852 data: 0.0839 max mem: 9377 +Train: [61] [ 700/6250] eta: 0:16:17 lr: 0.000045 grad: 0.1029 (0.1073) loss: 0.8127 (0.8169) time: 0.1669 data: 0.0760 max mem: 9377 +Train: [61] [ 800/6250] eta: 0:15:41 lr: 0.000045 grad: 0.0951 (0.1068) loss: 0.8099 (0.8167) time: 0.1637 data: 0.0717 max mem: 9377 +Train: [61] [ 900/6250] eta: 0:15:16 lr: 0.000045 grad: 0.1029 (0.1064) loss: 0.8091 (0.8162) time: 0.1623 data: 0.0677 max mem: 9377 +Train: [61] [1000/6250] eta: 0:14:47 lr: 0.000045 grad: 0.0965 (0.1059) loss: 0.8064 (0.8159) time: 0.1664 data: 0.0855 max mem: 9377 +Train: [61] [1100/6250] eta: 0:14:20 lr: 0.000045 grad: 0.1017 (0.1054) loss: 0.8097 (0.8155) time: 0.1439 data: 0.0585 max mem: 9377 +Train: [61] [1200/6250] eta: 0:13:57 lr: 0.000045 grad: 0.0960 (0.1051) loss: 0.8168 (0.8153) time: 0.1723 data: 0.0940 max mem: 9377 +Train: [61] [1300/6250] eta: 0:13:31 lr: 0.000045 grad: 0.1010 (0.1052) loss: 0.8144 (0.8149) time: 0.1286 data: 0.0434 max mem: 9377 +Train: [61] [1400/6250] eta: 0:13:07 lr: 0.000045 grad: 0.1036 (0.1052) loss: 0.8160 (0.8144) time: 0.1291 data: 0.0467 max mem: 9377 +Train: [61] [1500/6250] eta: 0:12:44 lr: 0.000045 grad: 0.0980 (0.1050) loss: 0.8099 (0.8141) time: 0.1473 data: 0.0622 max mem: 9377 +Train: [61] [1600/6250] eta: 0:12:24 lr: 0.000045 grad: 0.0942 (0.1048) loss: 0.8161 (0.8140) time: 0.1557 data: 0.0766 max mem: 9377 +Train: [61] [1700/6250] eta: 0:12:03 lr: 0.000045 grad: 0.1020 (0.1047) loss: 0.8106 (0.8138) time: 0.1324 data: 0.0494 max mem: 9377 +Train: [61] [1800/6250] eta: 0:11:43 lr: 0.000045 grad: 0.0937 (0.1046) loss: 0.8103 (0.8137) time: 0.1364 data: 0.0509 max mem: 9377 +Train: [61] [1900/6250] eta: 0:11:24 lr: 0.000045 grad: 0.0983 (0.1047) loss: 0.8073 (0.8136) time: 0.1693 data: 0.0875 max mem: 9377 +Train: [61] [2000/6250] eta: 0:11:03 lr: 0.000045 grad: 0.0968 (0.1048) loss: 0.8166 (0.8135) time: 0.1313 data: 0.0469 max mem: 9377 +Train: [61] [2100/6250] eta: 0:10:45 lr: 0.000044 grad: 0.1031 (0.1048) loss: 0.8080 (0.8135) time: 0.1478 data: 0.0726 max mem: 9377 +Train: [61] [2200/6250] eta: 0:10:26 lr: 0.000044 grad: 0.1006 (0.1048) loss: 0.8096 (0.8135) time: 0.1361 data: 0.0571 max mem: 9377 +Train: [61] [2300/6250] eta: 0:10:08 lr: 0.000044 grad: 0.1123 (0.1049) loss: 0.8120 (0.8134) time: 0.1447 data: 0.0479 max mem: 9377 +Train: [61] [2400/6250] eta: 0:09:51 lr: 0.000044 grad: 0.1033 (0.1051) loss: 0.8165 (0.8134) time: 0.1385 data: 0.0554 max mem: 9377 +Train: [61] [2500/6250] eta: 0:09:34 lr: 0.000044 grad: 0.1088 (0.1052) loss: 0.8162 (0.8135) time: 0.1273 data: 0.0461 max mem: 9377 +Train: [61] [2600/6250] eta: 0:09:17 lr: 0.000044 grad: 0.1115 (0.1054) loss: 0.8145 (0.8135) time: 0.1400 data: 0.0590 max mem: 9377 +Train: [61] [2700/6250] eta: 0:09:00 lr: 0.000044 grad: 0.1057 (0.1054) loss: 0.8216 (0.8135) time: 0.1241 data: 0.0422 max mem: 9377 +Train: [61] [2800/6250] eta: 0:08:44 lr: 0.000044 grad: 0.1059 (0.1055) loss: 0.8101 (0.8135) time: 0.1473 data: 0.0615 max mem: 9377 +Train: [61] [2900/6250] eta: 0:08:28 lr: 0.000044 grad: 0.0997 (0.1055) loss: 0.8144 (0.8135) time: 0.1428 data: 0.0640 max mem: 9377 +Train: [61] [3000/6250] eta: 0:08:12 lr: 0.000044 grad: 0.1052 (0.1057) loss: 0.8204 (0.8136) time: 0.1471 data: 0.0675 max mem: 9377 +Train: [61] [3100/6250] eta: 0:07:56 lr: 0.000044 grad: 0.1000 (0.1057) loss: 0.8156 (0.8136) time: 0.1401 data: 0.0547 max mem: 9377 +Train: [61] [3200/6250] eta: 0:07:40 lr: 0.000044 grad: 0.1091 (0.1059) loss: 0.8159 (0.8136) time: 0.1512 data: 0.0763 max mem: 9377 +Train: [61] [3300/6250] eta: 0:07:25 lr: 0.000044 grad: 0.0989 (0.1059) loss: 0.8122 (0.8136) time: 0.1584 data: 0.0776 max mem: 9377 +Train: [61] [3400/6250] eta: 0:07:09 lr: 0.000044 grad: 0.1009 (0.1059) loss: 0.8177 (0.8136) time: 0.1471 data: 0.0651 max mem: 9377 +Train: [61] [3500/6250] eta: 0:06:53 lr: 0.000044 grad: 0.1082 (0.1060) loss: 0.8149 (0.8136) time: 0.1323 data: 0.0466 max mem: 9377 +Train: [61] [3600/6250] eta: 0:06:38 lr: 0.000044 grad: 0.1008 (0.1060) loss: 0.8165 (0.8137) time: 0.1395 data: 0.0592 max mem: 9377 +Train: [61] [3700/6250] eta: 0:06:23 lr: 0.000044 grad: 0.1045 (0.1060) loss: 0.8118 (0.8136) time: 0.1680 data: 0.0867 max mem: 9377 +Train: [61] [3800/6250] eta: 0:06:08 lr: 0.000044 grad: 0.1054 (0.1061) loss: 0.8133 (0.8136) time: 0.1382 data: 0.0587 max mem: 9377 +Train: [61] [3900/6250] eta: 0:05:53 lr: 0.000044 grad: 0.1094 (0.1062) loss: 0.8129 (0.8135) time: 0.1515 data: 0.0708 max mem: 9377 +Train: [61] [4000/6250] eta: 0:05:38 lr: 0.000044 grad: 0.1033 (0.1061) loss: 0.8150 (0.8135) time: 0.1572 data: 0.0770 max mem: 9377 +Train: [61] [4100/6250] eta: 0:05:23 lr: 0.000044 grad: 0.1072 (0.1062) loss: 0.8067 (0.8135) time: 0.1283 data: 0.0497 max mem: 9377 +Train: [61] [4200/6250] eta: 0:05:07 lr: 0.000044 grad: 0.1045 (0.1063) loss: 0.8087 (0.8134) time: 0.1435 data: 0.0597 max mem: 9377 +Train: [61] [4300/6250] eta: 0:04:53 lr: 0.000044 grad: 0.1148 (0.1064) loss: 0.8053 (0.8134) time: 0.1401 data: 0.0594 max mem: 9377 +Train: [61] [4400/6250] eta: 0:04:38 lr: 0.000044 grad: 0.1056 (0.1064) loss: 0.8099 (0.8133) time: 0.1453 data: 0.0610 max mem: 9377 +Train: [61] [4500/6250] eta: 0:04:24 lr: 0.000044 grad: 0.1084 (0.1065) loss: 0.8106 (0.8132) time: 0.1681 data: 0.0881 max mem: 9377 +Train: [61] [4600/6250] eta: 0:04:09 lr: 0.000044 grad: 0.1122 (0.1067) loss: 0.8092 (0.8131) time: 0.1238 data: 0.0448 max mem: 9377 +Train: [61] [4700/6250] eta: 0:03:54 lr: 0.000044 grad: 0.1007 (0.1067) loss: 0.8143 (0.8131) time: 0.1413 data: 0.0613 max mem: 9377 +Train: [61] [4800/6250] eta: 0:03:39 lr: 0.000044 grad: 0.1079 (0.1068) loss: 0.8166 (0.8131) time: 0.1616 data: 0.0834 max mem: 9377 +Train: [61] [4900/6250] eta: 0:03:24 lr: 0.000044 grad: 0.1068 (0.1069) loss: 0.8157 (0.8130) time: 0.1457 data: 0.0649 max mem: 9377 +Train: [61] [5000/6250] eta: 0:03:09 lr: 0.000044 grad: 0.1157 (0.1070) loss: 0.8093 (0.8130) time: 0.1617 data: 0.0770 max mem: 9377 +Train: [61] [5100/6250] eta: 0:02:54 lr: 0.000044 grad: 0.1065 (0.1071) loss: 0.8149 (0.8130) time: 0.1632 data: 0.0796 max mem: 9377 +Train: [61] [5200/6250] eta: 0:02:38 lr: 0.000044 grad: 0.1018 (0.1071) loss: 0.8040 (0.8129) time: 0.1271 data: 0.0378 max mem: 9377 +Train: [61] [5300/6250] eta: 0:02:23 lr: 0.000043 grad: 0.1090 (0.1072) loss: 0.8132 (0.8129) time: 0.1173 data: 0.0360 max mem: 9377 +Train: [61] [5400/6250] eta: 0:02:07 lr: 0.000043 grad: 0.1080 (0.1072) loss: 0.8131 (0.8128) time: 0.1258 data: 0.0398 max mem: 9377 +Train: [61] [5500/6250] eta: 0:01:52 lr: 0.000043 grad: 0.1050 (0.1073) loss: 0.8092 (0.8128) time: 0.1248 data: 0.0365 max mem: 9377 +Train: [61] [5600/6250] eta: 0:01:37 lr: 0.000043 grad: 0.1034 (0.1073) loss: 0.8125 (0.8128) time: 0.1108 data: 0.0284 max mem: 9377 +Train: [61] [5700/6250] eta: 0:01:22 lr: 0.000043 grad: 0.1039 (0.1073) loss: 0.8174 (0.8129) time: 0.1478 data: 0.0689 max mem: 9377 +Train: [61] [5800/6250] eta: 0:01:07 lr: 0.000043 grad: 0.1021 (0.1073) loss: 0.8146 (0.8129) time: 0.1510 data: 0.0654 max mem: 9377 +Train: [61] [5900/6250] eta: 0:00:52 lr: 0.000043 grad: 0.1001 (0.1072) loss: 0.8165 (0.8130) time: 0.1395 data: 0.0524 max mem: 9377 +Train: [61] [6000/6250] eta: 0:00:37 lr: 0.000043 grad: 0.1020 (0.1073) loss: 0.8119 (0.8130) time: 0.1362 data: 0.0556 max mem: 9377 +Train: [61] [6100/6250] eta: 0:00:22 lr: 0.000043 grad: 0.1076 (0.1073) loss: 0.8189 (0.8130) time: 0.1387 data: 0.0534 max mem: 9377 +Train: [61] [6200/6250] eta: 0:00:07 lr: 0.000043 grad: 0.1066 (0.1073) loss: 0.8073 (0.8129) time: 0.1520 data: 0.0698 max mem: 9377 +Train: [61] [6249/6250] eta: 0:00:00 lr: 0.000043 grad: 0.1023 (0.1073) loss: 0.8104 (0.8129) time: 0.1512 data: 0.0720 max mem: 9377 +Train: [61] Total time: 0:15:32 (0.1492 s / it) +Averaged stats: lr: 0.000043 grad: 0.1023 (0.1073) loss: 0.8104 (0.8129) +Eval (hcp-train-subset): [61] [ 0/62] eta: 0:05:37 loss: 0.8296 (0.8296) time: 5.4416 data: 5.4081 max mem: 9377 +Eval (hcp-train-subset): [61] [61/62] eta: 0:00:00 loss: 0.8261 (0.8275) time: 0.1400 data: 0.1145 max mem: 9377 +Eval (hcp-train-subset): [61] Total time: 0:00:16 (0.2594 s / it) +Averaged stats (hcp-train-subset): loss: 0.8261 (0.8275) +Eval (hcp-val): [61] [ 0/62] eta: 0:03:34 loss: 0.8304 (0.8304) time: 3.4557 data: 3.3688 max mem: 9377 +Eval (hcp-val): [61] [61/62] eta: 0:00:00 loss: 0.8320 (0.8335) time: 0.1451 data: 0.1195 max mem: 9377 +Eval (hcp-val): [61] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (hcp-val): loss: 0.8320 (0.8335) +Eval (nsd-val): [61] [ 0/62] eta: 0:03:53 loss: 0.7996 (0.7996) time: 3.7644 data: 3.6877 max mem: 9377 +Eval (nsd-val): [61] [61/62] eta: 0:00:00 loss: 0.8126 (0.8110) time: 0.1432 data: 0.1180 max mem: 9377 +Eval (nsd-val): [61] Total time: 0:00:14 (0.2297 s / it) +Averaged stats (nsd-val): loss: 0.8126 (0.8110) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [62] [ 0/6250] eta: 7:41:24 lr: 0.000043 grad: 0.0720 (0.0720) loss: 0.8707 (0.8707) time: 4.4295 data: 4.1786 max mem: 9377 +Train: [62] [ 100/6250] eta: 0:20:16 lr: 0.000043 grad: 0.0989 (0.1152) loss: 0.8151 (0.8293) time: 0.1482 data: 0.0481 max mem: 9377 +Train: [62] [ 200/6250] eta: 0:17:41 lr: 0.000043 grad: 0.1087 (0.1188) loss: 0.8146 (0.8235) time: 0.1640 data: 0.0738 max mem: 9377 +Train: [62] [ 300/6250] eta: 0:16:21 lr: 0.000043 grad: 0.0982 (0.1170) loss: 0.8223 (0.8206) time: 0.1390 data: 0.0418 max mem: 9377 +Train: [62] [ 400/6250] eta: 0:15:39 lr: 0.000043 grad: 0.1118 (0.1169) loss: 0.8143 (0.8192) time: 0.1486 data: 0.0592 max mem: 9377 +Train: [62] [ 500/6250] eta: 0:15:04 lr: 0.000043 grad: 0.1056 (0.1159) loss: 0.8110 (0.8184) time: 0.1530 data: 0.0555 max mem: 9377 +Train: [62] [ 600/6250] eta: 0:14:38 lr: 0.000043 grad: 0.1042 (0.1153) loss: 0.8203 (0.8177) time: 0.1679 data: 0.0726 max mem: 9377 +Train: [62] [ 700/6250] eta: 0:14:22 lr: 0.000043 grad: 0.1035 (0.1145) loss: 0.8176 (0.8173) time: 0.1570 data: 0.0658 max mem: 9377 +Train: [62] [ 800/6250] eta: 0:13:55 lr: 0.000043 grad: 0.1013 (0.1137) loss: 0.8152 (0.8171) time: 0.1483 data: 0.0471 max mem: 9377 +Train: [62] [ 900/6250] eta: 0:13:31 lr: 0.000043 grad: 0.1014 (0.1132) loss: 0.8119 (0.8165) time: 0.1391 data: 0.0509 max mem: 9377 +Train: [62] [1000/6250] eta: 0:13:10 lr: 0.000043 grad: 0.0985 (0.1121) loss: 0.8241 (0.8166) time: 0.1360 data: 0.0518 max mem: 9377 +Train: [62] [1100/6250] eta: 0:12:50 lr: 0.000043 grad: 0.1047 (0.1114) loss: 0.8227 (0.8165) time: 0.1431 data: 0.0597 max mem: 9377 +Train: [62] [1200/6250] eta: 0:12:30 lr: 0.000043 grad: 0.1027 (0.1109) loss: 0.8099 (0.8164) time: 0.1383 data: 0.0539 max mem: 9377 +Train: [62] [1300/6250] eta: 0:12:12 lr: 0.000043 grad: 0.0994 (0.1107) loss: 0.8140 (0.8162) time: 0.1327 data: 0.0514 max mem: 9377 +Train: [62] [1400/6250] eta: 0:11:54 lr: 0.000043 grad: 0.1043 (0.1103) loss: 0.8185 (0.8160) time: 0.1461 data: 0.0615 max mem: 9377 +Train: [62] [1500/6250] eta: 0:11:36 lr: 0.000043 grad: 0.1021 (0.1100) loss: 0.8104 (0.8159) time: 0.1110 data: 0.0268 max mem: 9377 +Train: [62] [1600/6250] eta: 0:11:21 lr: 0.000043 grad: 0.1006 (0.1097) loss: 0.8147 (0.8158) time: 0.1069 data: 0.0234 max mem: 9377 +Train: [62] [1700/6250] eta: 0:11:04 lr: 0.000043 grad: 0.1045 (0.1096) loss: 0.8131 (0.8156) time: 0.1325 data: 0.0466 max mem: 9377 +Train: [62] [1800/6250] eta: 0:10:48 lr: 0.000043 grad: 0.0984 (0.1096) loss: 0.8155 (0.8154) time: 0.1255 data: 0.0406 max mem: 9377 +Train: [62] [1900/6250] eta: 0:10:32 lr: 0.000043 grad: 0.1063 (0.1095) loss: 0.8105 (0.8153) time: 0.1284 data: 0.0445 max mem: 9377 +Train: [62] [2000/6250] eta: 0:10:18 lr: 0.000043 grad: 0.1112 (0.1094) loss: 0.8113 (0.8151) time: 0.1504 data: 0.0585 max mem: 9377 +Train: [62] [2100/6250] eta: 0:10:03 lr: 0.000043 grad: 0.1062 (0.1093) loss: 0.8162 (0.8150) time: 0.1353 data: 0.0489 max mem: 9377 +Train: [62] [2200/6250] eta: 0:09:48 lr: 0.000042 grad: 0.1117 (0.1093) loss: 0.8092 (0.8148) time: 0.1561 data: 0.0795 max mem: 9377 +Train: [62] [2300/6250] eta: 0:09:32 lr: 0.000042 grad: 0.1084 (0.1092) loss: 0.8149 (0.8146) time: 0.1418 data: 0.0573 max mem: 9377 +Train: [62] [2400/6250] eta: 0:09:17 lr: 0.000042 grad: 0.1038 (0.1092) loss: 0.8138 (0.8144) time: 0.1264 data: 0.0414 max mem: 9377 +Train: [62] [2500/6250] eta: 0:09:02 lr: 0.000042 grad: 0.1065 (0.1092) loss: 0.8112 (0.8143) time: 0.1407 data: 0.0595 max mem: 9377 +Train: [62] [2600/6250] eta: 0:08:47 lr: 0.000042 grad: 0.1010 (0.1092) loss: 0.8124 (0.8142) time: 0.1410 data: 0.0559 max mem: 9377 +Train: [62] [2700/6250] eta: 0:08:32 lr: 0.000042 grad: 0.1019 (0.1091) loss: 0.8130 (0.8141) time: 0.1435 data: 0.0586 max mem: 9377 +Train: [62] [2800/6250] eta: 0:08:17 lr: 0.000042 grad: 0.1112 (0.1092) loss: 0.8123 (0.8140) time: 0.1376 data: 0.0569 max mem: 9377 +Train: [62] [2900/6250] eta: 0:08:02 lr: 0.000042 grad: 0.1042 (0.1092) loss: 0.8115 (0.8140) time: 0.1282 data: 0.0428 max mem: 9377 +Train: [62] [3000/6250] eta: 0:07:48 lr: 0.000042 grad: 0.1070 (0.1093) loss: 0.8089 (0.8139) time: 0.1433 data: 0.0554 max mem: 9377 +Train: [62] [3100/6250] eta: 0:07:34 lr: 0.000042 grad: 0.1045 (0.1092) loss: 0.8153 (0.8138) time: 0.1433 data: 0.0608 max mem: 9377 +Train: [62] [3200/6250] eta: 0:07:21 lr: 0.000042 grad: 0.1115 (0.1093) loss: 0.8093 (0.8137) time: 0.1400 data: 0.0562 max mem: 9377 +Train: [62] [3300/6250] eta: 0:07:07 lr: 0.000042 grad: 0.1005 (0.1092) loss: 0.8097 (0.8136) time: 0.1348 data: 0.0485 max mem: 9377 +Train: [62] [3400/6250] eta: 0:06:53 lr: 0.000042 grad: 0.1137 (0.1092) loss: 0.8061 (0.8135) time: 0.1520 data: 0.0706 max mem: 9377 +Train: [62] [3500/6250] eta: 0:06:39 lr: 0.000042 grad: 0.1073 (0.1092) loss: 0.8174 (0.8134) time: 0.1533 data: 0.0688 max mem: 9377 +Train: [62] [3600/6250] eta: 0:06:25 lr: 0.000042 grad: 0.1039 (0.1093) loss: 0.8108 (0.8133) time: 0.1526 data: 0.0689 max mem: 9377 +Train: [62] [3700/6250] eta: 0:06:10 lr: 0.000042 grad: 0.1011 (0.1092) loss: 0.8115 (0.8133) time: 0.1557 data: 0.0738 max mem: 9377 +Train: [62] [3800/6250] eta: 0:05:55 lr: 0.000042 grad: 0.1052 (0.1092) loss: 0.8138 (0.8133) time: 0.1396 data: 0.0610 max mem: 9377 +Train: [62] [3900/6250] eta: 0:05:41 lr: 0.000042 grad: 0.1051 (0.1092) loss: 0.8124 (0.8132) time: 0.1352 data: 0.0548 max mem: 9377 +Train: [62] [4000/6250] eta: 0:05:26 lr: 0.000042 grad: 0.1046 (0.1092) loss: 0.8113 (0.8131) time: 0.1455 data: 0.0564 max mem: 9377 +Train: [62] [4100/6250] eta: 0:05:12 lr: 0.000042 grad: 0.1046 (0.1092) loss: 0.8101 (0.8130) time: 0.1949 data: 0.1185 max mem: 9377 +Train: [62] [4200/6250] eta: 0:04:59 lr: 0.000042 grad: 0.1050 (0.1092) loss: 0.8105 (0.8130) time: 0.1493 data: 0.0616 max mem: 9377 +Train: [62] [4300/6250] eta: 0:04:45 lr: 0.000042 grad: 0.1043 (0.1091) loss: 0.8115 (0.8130) time: 0.1571 data: 0.0748 max mem: 9377 +Train: [62] [4400/6250] eta: 0:04:31 lr: 0.000042 grad: 0.1013 (0.1090) loss: 0.8139 (0.8130) time: 0.1429 data: 0.0616 max mem: 9377 +Train: [62] [4500/6250] eta: 0:04:16 lr: 0.000042 grad: 0.1070 (0.1090) loss: 0.8138 (0.8130) time: 0.1604 data: 0.0749 max mem: 9377 +Train: [62] [4600/6250] eta: 0:04:02 lr: 0.000042 grad: 0.1037 (0.1090) loss: 0.8173 (0.8130) time: 0.1524 data: 0.0643 max mem: 9377 +Train: [62] [4700/6250] eta: 0:03:48 lr: 0.000042 grad: 0.1047 (0.1089) loss: 0.8181 (0.8130) time: 0.1564 data: 0.0624 max mem: 9377 +Train: [62] [4800/6250] eta: 0:03:33 lr: 0.000042 grad: 0.1066 (0.1089) loss: 0.8061 (0.8131) time: 0.1530 data: 0.0509 max mem: 9377 +Train: [62] [4900/6250] eta: 0:03:19 lr: 0.000042 grad: 0.1022 (0.1089) loss: 0.8212 (0.8131) time: 0.1195 data: 0.0292 max mem: 9377 +Train: [62] [5000/6250] eta: 0:03:04 lr: 0.000042 grad: 0.0968 (0.1088) loss: 0.8175 (0.8131) time: 0.1513 data: 0.0743 max mem: 9377 +Train: [62] [5100/6250] eta: 0:02:49 lr: 0.000042 grad: 0.1093 (0.1088) loss: 0.8096 (0.8131) time: 0.1398 data: 0.0486 max mem: 9377 +Train: [62] [5200/6250] eta: 0:02:34 lr: 0.000042 grad: 0.1001 (0.1088) loss: 0.8136 (0.8131) time: 0.1677 data: 0.0870 max mem: 9377 +Train: [62] [5300/6250] eta: 0:02:20 lr: 0.000042 grad: 0.1008 (0.1089) loss: 0.8027 (0.8131) time: 0.1693 data: 0.0887 max mem: 9377 +Train: [62] [5400/6250] eta: 0:02:05 lr: 0.000041 grad: 0.1038 (0.1089) loss: 0.8106 (0.8130) time: 0.1699 data: 0.0859 max mem: 9377 +Train: [62] [5500/6250] eta: 0:01:51 lr: 0.000041 grad: 0.0994 (0.1088) loss: 0.8138 (0.8130) time: 0.1637 data: 0.0754 max mem: 9377 +Train: [62] [5600/6250] eta: 0:01:36 lr: 0.000041 grad: 0.1113 (0.1088) loss: 0.8146 (0.8130) time: 0.1768 data: 0.0949 max mem: 9377 +Train: [62] [5700/6250] eta: 0:01:21 lr: 0.000041 grad: 0.1073 (0.1088) loss: 0.8129 (0.8131) time: 0.1298 data: 0.0417 max mem: 9377 +Train: [62] [5800/6250] eta: 0:01:06 lr: 0.000041 grad: 0.1026 (0.1088) loss: 0.8156 (0.8131) time: 0.1280 data: 0.0419 max mem: 9377 +Train: [62] [5900/6250] eta: 0:00:51 lr: 0.000041 grad: 0.1030 (0.1088) loss: 0.8185 (0.8131) time: 0.1240 data: 0.0399 max mem: 9377 +Train: [62] [6000/6250] eta: 0:00:37 lr: 0.000041 grad: 0.1040 (0.1088) loss: 0.8089 (0.8131) time: 0.1514 data: 0.0653 max mem: 9377 +Train: [62] [6100/6250] eta: 0:00:22 lr: 0.000041 grad: 0.1073 (0.1089) loss: 0.8163 (0.8130) time: 0.1662 data: 0.0796 max mem: 9377 +Train: [62] [6200/6250] eta: 0:00:07 lr: 0.000041 grad: 0.1057 (0.1089) loss: 0.8140 (0.8130) time: 0.1519 data: 0.0640 max mem: 9377 +Train: [62] [6249/6250] eta: 0:00:00 lr: 0.000041 grad: 0.1008 (0.1089) loss: 0.8182 (0.8131) time: 0.1564 data: 0.0717 max mem: 9377 +Train: [62] Total time: 0:15:38 (0.1501 s / it) +Averaged stats: lr: 0.000041 grad: 0.1008 (0.1089) loss: 0.8182 (0.8131) +Eval (hcp-train-subset): [62] [ 0/62] eta: 0:05:37 loss: 0.8316 (0.8316) time: 5.4362 data: 5.4074 max mem: 9377 +Eval (hcp-train-subset): [62] [61/62] eta: 0:00:00 loss: 0.8252 (0.8258) time: 0.1195 data: 0.0943 max mem: 9377 +Eval (hcp-train-subset): [62] Total time: 0:00:14 (0.2326 s / it) +Averaged stats (hcp-train-subset): loss: 0.8252 (0.8258) +Eval (hcp-val): [62] [ 0/62] eta: 0:04:21 loss: 0.8325 (0.8325) time: 4.2195 data: 4.1406 max mem: 9377 +Eval (hcp-val): [62] [61/62] eta: 0:00:00 loss: 0.8330 (0.8341) time: 0.1422 data: 0.1152 max mem: 9377 +Eval (hcp-val): [62] Total time: 0:00:15 (0.2468 s / it) +Averaged stats (hcp-val): loss: 0.8330 (0.8341) +Eval (nsd-val): [62] [ 0/62] eta: 0:04:58 loss: 0.7978 (0.7978) time: 4.8116 data: 4.7380 max mem: 9377 +Eval (nsd-val): [62] [61/62] eta: 0:00:00 loss: 0.8082 (0.8113) time: 0.1244 data: 0.0971 max mem: 9377 +Eval (nsd-val): [62] Total time: 0:00:14 (0.2363 s / it) +Averaged stats (nsd-val): loss: 0.8082 (0.8113) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [63] [ 0/6250] eta: 9:04:45 lr: 0.000041 grad: 0.0994 (0.0994) loss: 0.8586 (0.8586) time: 5.2297 data: 4.8620 max mem: 9377 +Train: [63] [ 100/6250] eta: 0:21:46 lr: 0.000041 grad: 0.1034 (0.1121) loss: 0.8278 (0.8375) time: 0.1619 data: 0.0637 max mem: 9377 +Train: [63] [ 200/6250] eta: 0:18:04 lr: 0.000041 grad: 0.1046 (0.1150) loss: 0.8168 (0.8284) time: 0.1533 data: 0.0605 max mem: 9377 +Train: [63] [ 300/6250] eta: 0:16:47 lr: 0.000041 grad: 0.1121 (0.1150) loss: 0.8178 (0.8250) time: 0.1371 data: 0.0436 max mem: 9377 +Train: [63] [ 400/6250] eta: 0:15:45 lr: 0.000041 grad: 0.1084 (0.1165) loss: 0.8179 (0.8224) time: 0.1398 data: 0.0434 max mem: 9377 +Train: [63] [ 500/6250] eta: 0:15:06 lr: 0.000041 grad: 0.1158 (0.1163) loss: 0.8207 (0.8208) time: 0.1535 data: 0.0574 max mem: 9377 +Train: [63] [ 600/6250] eta: 0:14:28 lr: 0.000041 grad: 0.1024 (0.1153) loss: 0.8135 (0.8197) time: 0.1424 data: 0.0589 max mem: 9377 +Train: [63] [ 700/6250] eta: 0:14:06 lr: 0.000041 grad: 0.0979 (0.1138) loss: 0.8265 (0.8193) time: 0.1578 data: 0.0712 max mem: 9377 +Train: [63] [ 800/6250] eta: 0:13:47 lr: 0.000041 grad: 0.1045 (0.1130) loss: 0.8142 (0.8187) time: 0.1428 data: 0.0583 max mem: 9377 +Train: [63] [ 900/6250] eta: 0:13:28 lr: 0.000041 grad: 0.1041 (0.1122) loss: 0.8200 (0.8183) time: 0.1225 data: 0.0363 max mem: 9377 +Train: [63] [1000/6250] eta: 0:13:11 lr: 0.000041 grad: 0.1062 (0.1111) loss: 0.8190 (0.8183) time: 0.1403 data: 0.0546 max mem: 9377 +Train: [63] [1100/6250] eta: 0:12:50 lr: 0.000041 grad: 0.0968 (0.1104) loss: 0.8173 (0.8182) time: 0.1411 data: 0.0639 max mem: 9377 +Train: [63] [1200/6250] eta: 0:12:33 lr: 0.000041 grad: 0.1101 (0.1100) loss: 0.8146 (0.8177) time: 0.1449 data: 0.0612 max mem: 9377 +Train: [63] [1300/6250] eta: 0:12:17 lr: 0.000041 grad: 0.1037 (0.1097) loss: 0.8097 (0.8172) time: 0.1425 data: 0.0547 max mem: 9377 +Train: [63] [1400/6250] eta: 0:12:01 lr: 0.000041 grad: 0.0967 (0.1095) loss: 0.8151 (0.8167) time: 0.1360 data: 0.0561 max mem: 9377 +Train: [63] [1500/6250] eta: 0:11:46 lr: 0.000041 grad: 0.1001 (0.1092) loss: 0.8124 (0.8163) time: 0.1325 data: 0.0506 max mem: 9377 +Train: [63] [1600/6250] eta: 0:11:28 lr: 0.000041 grad: 0.1032 (0.1090) loss: 0.8144 (0.8161) time: 0.1413 data: 0.0604 max mem: 9377 +Train: [63] [1700/6250] eta: 0:11:12 lr: 0.000041 grad: 0.1080 (0.1089) loss: 0.8124 (0.8158) time: 0.1454 data: 0.0622 max mem: 9377 +Train: [63] [1800/6250] eta: 0:10:59 lr: 0.000041 grad: 0.1056 (0.1088) loss: 0.8138 (0.8155) time: 0.1594 data: 0.0752 max mem: 9377 +Train: [63] [1900/6250] eta: 0:10:42 lr: 0.000041 grad: 0.1054 (0.1088) loss: 0.8197 (0.8154) time: 0.1503 data: 0.0700 max mem: 9377 +Train: [63] [2000/6250] eta: 0:10:26 lr: 0.000041 grad: 0.1073 (0.1088) loss: 0.8085 (0.8152) time: 0.1422 data: 0.0530 max mem: 9377 +Train: [63] [2100/6250] eta: 0:10:13 lr: 0.000041 grad: 0.1074 (0.1087) loss: 0.8091 (0.8151) time: 0.1684 data: 0.0811 max mem: 9377 +Train: [63] [2200/6250] eta: 0:09:58 lr: 0.000041 grad: 0.1150 (0.1088) loss: 0.8069 (0.8149) time: 0.1415 data: 0.0635 max mem: 9377 +Train: [63] [2300/6250] eta: 0:09:43 lr: 0.000041 grad: 0.1077 (0.1088) loss: 0.8149 (0.8148) time: 0.1231 data: 0.0415 max mem: 9377 +Train: [63] [2400/6250] eta: 0:09:27 lr: 0.000040 grad: 0.1121 (0.1088) loss: 0.8040 (0.8145) time: 0.1417 data: 0.0595 max mem: 9377 +Train: [63] [2500/6250] eta: 0:09:12 lr: 0.000040 grad: 0.1094 (0.1089) loss: 0.8122 (0.8143) time: 0.1392 data: 0.0607 max mem: 9377 +Train: [63] [2600/6250] eta: 0:08:55 lr: 0.000040 grad: 0.1078 (0.1089) loss: 0.8077 (0.8142) time: 0.1383 data: 0.0547 max mem: 9377 +Train: [63] [2700/6250] eta: 0:08:41 lr: 0.000040 grad: 0.1095 (0.1090) loss: 0.8044 (0.8140) time: 0.1598 data: 0.0805 max mem: 9377 +Train: [63] [2800/6250] eta: 0:08:26 lr: 0.000040 grad: 0.1068 (0.1090) loss: 0.8117 (0.8140) time: 0.1769 data: 0.0911 max mem: 9377 +Train: [63] [2900/6250] eta: 0:08:10 lr: 0.000040 grad: 0.1063 (0.1092) loss: 0.8121 (0.8138) time: 0.1455 data: 0.0649 max mem: 9377 +Train: [63] [3000/6250] eta: 0:07:54 lr: 0.000040 grad: 0.1033 (0.1095) loss: 0.8139 (0.8137) time: 0.1472 data: 0.0659 max mem: 9377 +Train: [63] [3100/6250] eta: 0:07:39 lr: 0.000040 grad: 0.1117 (0.1101) loss: 0.8023 (0.8136) time: 0.1474 data: 0.0677 max mem: 9377 +Train: [63] [3200/6250] eta: 0:07:24 lr: 0.000040 grad: 0.1054 (0.1102) loss: 0.8164 (0.8135) time: 0.1301 data: 0.0474 max mem: 9377 +Train: [63] [3300/6250] eta: 0:07:09 lr: 0.000040 grad: 0.1113 (0.1104) loss: 0.8052 (0.8133) time: 0.1441 data: 0.0599 max mem: 9377 +Train: [63] [3400/6250] eta: 0:06:55 lr: 0.000040 grad: 0.1078 (0.1105) loss: 0.8110 (0.8131) time: 0.1486 data: 0.0684 max mem: 9377 +Train: [63] [3500/6250] eta: 0:06:40 lr: 0.000040 grad: 0.1136 (0.1105) loss: 0.8068 (0.8130) time: 0.1443 data: 0.0592 max mem: 9377 +Train: [63] [3600/6250] eta: 0:06:25 lr: 0.000040 grad: 0.1079 (0.1106) loss: 0.8060 (0.8129) time: 0.1521 data: 0.0733 max mem: 9377 +Train: [63] [3700/6250] eta: 0:06:11 lr: 0.000040 grad: 0.1133 (0.1107) loss: 0.7948 (0.8126) time: 0.1473 data: 0.0647 max mem: 9377 +Train: [63] [3800/6250] eta: 0:05:56 lr: 0.000040 grad: 0.1144 (0.1108) loss: 0.8113 (0.8124) time: 0.1376 data: 0.0549 max mem: 9377 +Train: [63] [3900/6250] eta: 0:05:41 lr: 0.000040 grad: 0.1174 (0.1109) loss: 0.8063 (0.8123) time: 0.1491 data: 0.0667 max mem: 9377 +Train: [63] [4000/6250] eta: 0:05:28 lr: 0.000040 grad: 0.1168 (0.1112) loss: 0.8120 (0.8122) time: 0.2085 data: 0.1286 max mem: 9377 +Train: [63] [4100/6250] eta: 0:05:15 lr: 0.000040 grad: 0.1106 (0.1114) loss: 0.8083 (0.8120) time: 0.1875 data: 0.1031 max mem: 9377 +Train: [63] [4200/6250] eta: 0:05:00 lr: 0.000040 grad: 0.1138 (0.1115) loss: 0.8067 (0.8119) time: 0.1514 data: 0.0708 max mem: 9377 +Train: [63] [4300/6250] eta: 0:04:45 lr: 0.000040 grad: 0.1106 (0.1115) loss: 0.8101 (0.8118) time: 0.1512 data: 0.0706 max mem: 9377 +Train: [63] [4400/6250] eta: 0:04:31 lr: 0.000040 grad: 0.1031 (0.1115) loss: 0.8158 (0.8118) time: 0.1295 data: 0.0418 max mem: 9377 +Train: [63] [4500/6250] eta: 0:04:16 lr: 0.000040 grad: 0.1102 (0.1116) loss: 0.8088 (0.8117) time: 0.1521 data: 0.0704 max mem: 9377 +Train: [63] [4600/6250] eta: 0:04:01 lr: 0.000040 grad: 0.1156 (0.1117) loss: 0.8111 (0.8116) time: 0.1528 data: 0.0680 max mem: 9377 +Train: [63] [4700/6250] eta: 0:03:46 lr: 0.000040 grad: 0.1087 (0.1118) loss: 0.8086 (0.8115) time: 0.1328 data: 0.0470 max mem: 9377 +Train: [63] [4800/6250] eta: 0:03:32 lr: 0.000040 grad: 0.1097 (0.1119) loss: 0.8068 (0.8115) time: 0.1420 data: 0.0587 max mem: 9377 +Train: [63] [4900/6250] eta: 0:03:17 lr: 0.000040 grad: 0.1115 (0.1119) loss: 0.8155 (0.8114) time: 0.1389 data: 0.0513 max mem: 9377 +Train: [63] [5000/6250] eta: 0:03:02 lr: 0.000040 grad: 0.1107 (0.1120) loss: 0.8040 (0.8113) time: 0.1398 data: 0.0505 max mem: 9377 +Train: [63] [5100/6250] eta: 0:02:48 lr: 0.000040 grad: 0.1093 (0.1120) loss: 0.8129 (0.8113) time: 0.1358 data: 0.0519 max mem: 9377 +Train: [63] [5200/6250] eta: 0:02:33 lr: 0.000040 grad: 0.1096 (0.1120) loss: 0.8129 (0.8113) time: 0.1396 data: 0.0480 max mem: 9377 +Train: [63] [5300/6250] eta: 0:02:18 lr: 0.000040 grad: 0.1127 (0.1121) loss: 0.8110 (0.8112) time: 0.1440 data: 0.0559 max mem: 9377 +Train: [63] [5400/6250] eta: 0:02:03 lr: 0.000040 grad: 0.1071 (0.1121) loss: 0.8068 (0.8111) time: 0.1372 data: 0.0532 max mem: 9377 +Train: [63] [5500/6250] eta: 0:01:49 lr: 0.000040 grad: 0.1092 (0.1123) loss: 0.8104 (0.8110) time: 0.1504 data: 0.0702 max mem: 9377 +Train: [63] [5600/6250] eta: 0:01:34 lr: 0.000039 grad: 0.1200 (0.1123) loss: 0.8071 (0.8110) time: 0.1613 data: 0.0871 max mem: 9377 +Train: [63] [5700/6250] eta: 0:01:20 lr: 0.000039 grad: 0.1162 (0.1125) loss: 0.7994 (0.8109) time: 0.1260 data: 0.0351 max mem: 9377 +Train: [63] [5800/6250] eta: 0:01:05 lr: 0.000039 grad: 0.1143 (0.1125) loss: 0.8008 (0.8109) time: 0.1267 data: 0.0420 max mem: 9377 +Train: [63] [5900/6250] eta: 0:00:50 lr: 0.000039 grad: 0.1091 (0.1126) loss: 0.8080 (0.8108) time: 0.1813 data: 0.0987 max mem: 9377 +Train: [63] [6000/6250] eta: 0:00:36 lr: 0.000039 grad: 0.1062 (0.1126) loss: 0.8133 (0.8108) time: 0.1626 data: 0.0678 max mem: 9377 +Train: [63] [6100/6250] eta: 0:00:21 lr: 0.000039 grad: 0.1107 (0.1125) loss: 0.8099 (0.8108) time: 0.1528 data: 0.0694 max mem: 9377 +Train: [63] [6200/6250] eta: 0:00:07 lr: 0.000039 grad: 0.1074 (0.1126) loss: 0.8123 (0.8108) time: 0.1690 data: 0.0840 max mem: 9377 +Train: [63] [6249/6250] eta: 0:00:00 lr: 0.000039 grad: 0.1122 (0.1126) loss: 0.8134 (0.8108) time: 0.1518 data: 0.0768 max mem: 9377 +Train: [63] Total time: 0:15:18 (0.1469 s / it) +Averaged stats: lr: 0.000039 grad: 0.1122 (0.1126) loss: 0.8134 (0.8108) +Eval (hcp-train-subset): [63] [ 0/62] eta: 0:03:30 loss: 0.8324 (0.8324) time: 3.3942 data: 3.3234 max mem: 9377 +Eval (hcp-train-subset): [63] [61/62] eta: 0:00:00 loss: 0.8233 (0.8258) time: 0.1153 data: 0.0886 max mem: 9377 +Eval (hcp-train-subset): [63] Total time: 0:00:13 (0.2103 s / it) +Averaged stats (hcp-train-subset): loss: 0.8233 (0.8258) +Eval (hcp-val): [63] [ 0/62] eta: 0:05:29 loss: 0.8283 (0.8283) time: 5.3071 data: 5.2777 max mem: 9377 +Eval (hcp-val): [63] [61/62] eta: 0:00:00 loss: 0.8328 (0.8341) time: 0.1144 data: 0.0895 max mem: 9377 +Eval (hcp-val): [63] Total time: 0:00:13 (0.2103 s / it) +Averaged stats (hcp-val): loss: 0.8328 (0.8341) +Eval (nsd-val): [63] [ 0/62] eta: 0:03:54 loss: 0.8020 (0.8020) time: 3.7796 data: 3.7098 max mem: 9377 +Eval (nsd-val): [63] [61/62] eta: 0:00:00 loss: 0.8097 (0.8118) time: 0.1110 data: 0.0840 max mem: 9377 +Eval (nsd-val): [63] Total time: 0:00:12 (0.2066 s / it) +Averaged stats (nsd-val): loss: 0.8097 (0.8118) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [64] [ 0/6250] eta: 7:58:39 lr: 0.000039 grad: 0.0657 (0.0657) loss: 0.8607 (0.8607) time: 4.5951 data: 4.3244 max mem: 9377 +Train: [64] [ 100/6250] eta: 0:19:21 lr: 0.000039 grad: 0.1029 (0.1312) loss: 0.8172 (0.8332) time: 0.1379 data: 0.0252 max mem: 9377 +Train: [64] [ 200/6250] eta: 0:17:17 lr: 0.000039 grad: 0.1029 (0.1236) loss: 0.8148 (0.8254) time: 0.1383 data: 0.0355 max mem: 9377 +Train: [64] [ 300/6250] eta: 0:16:27 lr: 0.000039 grad: 0.1067 (0.1216) loss: 0.8219 (0.8221) time: 0.1552 data: 0.0512 max mem: 9377 +Train: [64] [ 400/6250] eta: 0:16:01 lr: 0.000039 grad: 0.1110 (0.1216) loss: 0.8044 (0.8186) time: 0.1718 data: 0.0807 max mem: 9377 +Train: [64] [ 500/6250] eta: 0:15:30 lr: 0.000039 grad: 0.1080 (0.1202) loss: 0.8097 (0.8169) time: 0.1447 data: 0.0443 max mem: 9377 +Train: [64] [ 600/6250] eta: 0:14:59 lr: 0.000039 grad: 0.1084 (0.1204) loss: 0.8186 (0.8154) time: 0.1527 data: 0.0576 max mem: 9377 +Train: [64] [ 700/6250] eta: 0:14:30 lr: 0.000039 grad: 0.1126 (0.1202) loss: 0.8030 (0.8147) time: 0.1254 data: 0.0312 max mem: 9377 +Train: [64] [ 800/6250] eta: 0:14:17 lr: 0.000039 grad: 0.1088 (0.1190) loss: 0.8159 (0.8145) time: 0.1494 data: 0.0651 max mem: 9377 +Train: [64] [ 900/6250] eta: 0:14:12 lr: 0.000039 grad: 0.1112 (0.1184) loss: 0.8138 (0.8145) time: 0.1711 data: 0.0881 max mem: 9377 +Train: [64] [1000/6250] eta: 0:14:03 lr: 0.000039 grad: 0.1025 (0.1179) loss: 0.8119 (0.8143) time: 0.2016 data: 0.1229 max mem: 9377 +Train: [64] [1100/6250] eta: 0:13:52 lr: 0.000039 grad: 0.1021 (0.1172) loss: 0.8132 (0.8140) time: 0.1726 data: 0.0991 max mem: 9377 +Train: [64] [1200/6250] eta: 0:13:40 lr: 0.000039 grad: 0.1100 (0.1170) loss: 0.8106 (0.8137) time: 0.1788 data: 0.0923 max mem: 9377 +Train: [64] [1300/6250] eta: 0:13:29 lr: 0.000039 grad: 0.1133 (0.1170) loss: 0.8115 (0.8135) time: 0.1621 data: 0.0768 max mem: 9377 +Train: [64] [1400/6250] eta: 0:13:15 lr: 0.000039 grad: 0.1108 (0.1168) loss: 0.8114 (0.8133) time: 0.1626 data: 0.0845 max mem: 9377 +Train: [64] [1500/6250] eta: 0:12:59 lr: 0.000039 grad: 0.1022 (0.1162) loss: 0.8146 (0.8133) time: 0.1812 data: 0.0985 max mem: 9377 +Train: [64] [1600/6250] eta: 0:12:40 lr: 0.000039 grad: 0.1049 (0.1158) loss: 0.8100 (0.8133) time: 0.1662 data: 0.0864 max mem: 9377 +Train: [64] [1700/6250] eta: 0:12:19 lr: 0.000039 grad: 0.0984 (0.1154) loss: 0.8134 (0.8133) time: 0.1424 data: 0.0568 max mem: 9377 +Train: [64] [1800/6250] eta: 0:11:59 lr: 0.000039 grad: 0.1033 (0.1150) loss: 0.8174 (0.8133) time: 0.1397 data: 0.0609 max mem: 9377 +Train: [64] [1900/6250] eta: 0:11:40 lr: 0.000039 grad: 0.1084 (0.1148) loss: 0.8111 (0.8131) time: 0.1438 data: 0.0587 max mem: 9377 +Train: [64] [2000/6250] eta: 0:11:19 lr: 0.000039 grad: 0.1086 (0.1146) loss: 0.8124 (0.8131) time: 0.1455 data: 0.0545 max mem: 9377 +Train: [64] [2100/6250] eta: 0:11:02 lr: 0.000039 grad: 0.1053 (0.1144) loss: 0.8119 (0.8130) time: 0.1529 data: 0.0636 max mem: 9377 +Train: [64] [2200/6250] eta: 0:10:44 lr: 0.000039 grad: 0.1124 (0.1143) loss: 0.8083 (0.8129) time: 0.1589 data: 0.0800 max mem: 9377 +Train: [64] [2300/6250] eta: 0:10:26 lr: 0.000039 grad: 0.1139 (0.1144) loss: 0.8094 (0.8128) time: 0.1469 data: 0.0651 max mem: 9377 +Train: [64] [2400/6250] eta: 0:10:09 lr: 0.000039 grad: 0.1116 (0.1144) loss: 0.8149 (0.8127) time: 0.1453 data: 0.0637 max mem: 9377 +Train: [64] [2500/6250] eta: 0:09:51 lr: 0.000039 grad: 0.1110 (0.1145) loss: 0.8106 (0.8126) time: 0.1455 data: 0.0595 max mem: 9377 +Train: [64] [2600/6250] eta: 0:09:34 lr: 0.000039 grad: 0.1081 (0.1147) loss: 0.8121 (0.8125) time: 0.1668 data: 0.0883 max mem: 9377 +Train: [64] [2700/6250] eta: 0:09:17 lr: 0.000038 grad: 0.1125 (0.1149) loss: 0.8085 (0.8123) time: 0.1408 data: 0.0587 max mem: 9377 +Train: [64] [2800/6250] eta: 0:08:59 lr: 0.000038 grad: 0.1078 (0.1149) loss: 0.8059 (0.8121) time: 0.1273 data: 0.0427 max mem: 9377 +Train: [64] [2900/6250] eta: 0:08:42 lr: 0.000038 grad: 0.1092 (0.1149) loss: 0.8142 (0.8119) time: 0.1265 data: 0.0400 max mem: 9377 +Train: [64] [3000/6250] eta: 0:08:25 lr: 0.000038 grad: 0.1048 (0.1149) loss: 0.8099 (0.8118) time: 0.1224 data: 0.0445 max mem: 9377 +Train: [64] [3100/6250] eta: 0:08:07 lr: 0.000038 grad: 0.1045 (0.1149) loss: 0.8153 (0.8118) time: 0.1291 data: 0.0465 max mem: 9377 +Train: [64] [3200/6250] eta: 0:07:51 lr: 0.000038 grad: 0.1088 (0.1150) loss: 0.8105 (0.8117) time: 0.1285 data: 0.0456 max mem: 9377 +Train: [64] [3300/6250] eta: 0:07:34 lr: 0.000038 grad: 0.1069 (0.1149) loss: 0.8138 (0.8117) time: 0.1466 data: 0.0600 max mem: 9377 +Train: [64] [3400/6250] eta: 0:07:17 lr: 0.000038 grad: 0.1122 (0.1149) loss: 0.8126 (0.8117) time: 0.1250 data: 0.0391 max mem: 9377 +Train: [64] [3500/6250] eta: 0:07:01 lr: 0.000038 grad: 0.1149 (0.1149) loss: 0.8166 (0.8117) time: 0.1428 data: 0.0574 max mem: 9377 +Train: [64] [3600/6250] eta: 0:06:44 lr: 0.000038 grad: 0.1101 (0.1148) loss: 0.8110 (0.8117) time: 0.1370 data: 0.0554 max mem: 9377 +Train: [64] [3700/6250] eta: 0:06:30 lr: 0.000038 grad: 0.1081 (0.1148) loss: 0.8161 (0.8117) time: 0.1650 data: 0.0726 max mem: 9377 +Train: [64] [3800/6250] eta: 0:06:15 lr: 0.000038 grad: 0.1100 (0.1147) loss: 0.8084 (0.8117) time: 0.1471 data: 0.0590 max mem: 9377 +Train: [64] [3900/6250] eta: 0:06:00 lr: 0.000038 grad: 0.1086 (0.1147) loss: 0.8097 (0.8117) time: 0.1651 data: 0.0826 max mem: 9377 +Train: [64] [4000/6250] eta: 0:05:44 lr: 0.000038 grad: 0.1144 (0.1146) loss: 0.8191 (0.8117) time: 0.1656 data: 0.0768 max mem: 9377 +Train: [64] [4100/6250] eta: 0:05:28 lr: 0.000038 grad: 0.1028 (0.1145) loss: 0.8186 (0.8117) time: 0.1260 data: 0.0410 max mem: 9377 +Train: [64] [4200/6250] eta: 0:05:13 lr: 0.000038 grad: 0.1112 (0.1145) loss: 0.8085 (0.8117) time: 0.1354 data: 0.0531 max mem: 9377 +Train: [64] [4300/6250] eta: 0:04:57 lr: 0.000038 grad: 0.1108 (0.1144) loss: 0.8073 (0.8117) time: 0.1188 data: 0.0417 max mem: 9377 +Train: [64] [4400/6250] eta: 0:04:41 lr: 0.000038 grad: 0.1087 (0.1144) loss: 0.8122 (0.8116) time: 0.1512 data: 0.0626 max mem: 9377 +Train: [64] [4500/6250] eta: 0:04:26 lr: 0.000038 grad: 0.1085 (0.1144) loss: 0.8087 (0.8116) time: 0.1441 data: 0.0561 max mem: 9377 +Train: [64] [4600/6250] eta: 0:04:10 lr: 0.000038 grad: 0.1115 (0.1144) loss: 0.8111 (0.8116) time: 0.1442 data: 0.0569 max mem: 9377 +Train: [64] [4700/6250] eta: 0:03:54 lr: 0.000038 grad: 0.1069 (0.1143) loss: 0.8102 (0.8116) time: 0.1426 data: 0.0591 max mem: 9377 +Train: [64] [4800/6250] eta: 0:03:39 lr: 0.000038 grad: 0.1087 (0.1142) loss: 0.8097 (0.8116) time: 0.1416 data: 0.0616 max mem: 9377 +Train: [64] [4900/6250] eta: 0:03:24 lr: 0.000038 grad: 0.1081 (0.1141) loss: 0.8124 (0.8116) time: 0.1854 data: 0.1101 max mem: 9377 +Train: [64] [5000/6250] eta: 0:03:09 lr: 0.000038 grad: 0.1103 (0.1140) loss: 0.8080 (0.8116) time: 0.1570 data: 0.0755 max mem: 9377 +Train: [64] [5100/6250] eta: 0:02:54 lr: 0.000038 grad: 0.1044 (0.1139) loss: 0.8157 (0.8116) time: 0.1671 data: 0.0909 max mem: 9377 +Train: [64] [5200/6250] eta: 0:02:39 lr: 0.000038 grad: 0.1079 (0.1139) loss: 0.8080 (0.8116) time: 0.1635 data: 0.0758 max mem: 9377 +Train: [64] [5300/6250] eta: 0:02:24 lr: 0.000038 grad: 0.1046 (0.1138) loss: 0.8141 (0.8117) time: 0.1370 data: 0.0534 max mem: 9377 +Train: [64] [5400/6250] eta: 0:02:09 lr: 0.000038 grad: 0.1103 (0.1138) loss: 0.8093 (0.8117) time: 0.1298 data: 0.0434 max mem: 9377 +Train: [64] [5500/6250] eta: 0:01:53 lr: 0.000038 grad: 0.1102 (0.1137) loss: 0.8168 (0.8118) time: 0.1366 data: 0.0518 max mem: 9377 +Train: [64] [5600/6250] eta: 0:01:39 lr: 0.000038 grad: 0.1055 (0.1137) loss: 0.8138 (0.8118) time: 0.1992 data: 0.1082 max mem: 9377 +Train: [64] [5700/6250] eta: 0:01:23 lr: 0.000038 grad: 0.1109 (0.1137) loss: 0.8124 (0.8118) time: 0.1670 data: 0.0881 max mem: 9377 +Train: [64] [5800/6250] eta: 0:01:08 lr: 0.000038 grad: 0.1071 (0.1136) loss: 0.8071 (0.8118) time: 0.1715 data: 0.0961 max mem: 9377 +Train: [64] [5900/6250] eta: 0:00:53 lr: 0.000037 grad: 0.1129 (0.1136) loss: 0.8144 (0.8118) time: 0.1740 data: 0.0883 max mem: 9377 +Train: [64] [6000/6250] eta: 0:00:38 lr: 0.000037 grad: 0.1076 (0.1137) loss: 0.8129 (0.8118) time: 0.1523 data: 0.0628 max mem: 9377 +Train: [64] [6100/6250] eta: 0:00:22 lr: 0.000037 grad: 0.1222 (0.1138) loss: 0.7968 (0.8117) time: 0.1498 data: 0.0662 max mem: 9377 +Train: [64] [6200/6250] eta: 0:00:07 lr: 0.000037 grad: 0.1170 (0.1139) loss: 0.7989 (0.8117) time: 0.1418 data: 0.0624 max mem: 9377 +Train: [64] [6249/6250] eta: 0:00:00 lr: 0.000037 grad: 0.1201 (0.1139) loss: 0.8115 (0.8116) time: 0.1448 data: 0.0583 max mem: 9377 +Train: [64] Total time: 0:16:01 (0.1538 s / it) +Averaged stats: lr: 0.000037 grad: 0.1201 (0.1139) loss: 0.8115 (0.8116) +Eval (hcp-train-subset): [64] [ 0/62] eta: 0:05:02 loss: 0.8271 (0.8271) time: 4.8747 data: 4.8403 max mem: 9377 +Eval (hcp-train-subset): [64] [61/62] eta: 0:00:00 loss: 0.8213 (0.8241) time: 0.1070 data: 0.0799 max mem: 9377 +Eval (hcp-train-subset): [64] Total time: 0:00:13 (0.2112 s / it) +Averaged stats (hcp-train-subset): loss: 0.8213 (0.8241) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [64] [ 0/62] eta: 0:04:27 loss: 0.8286 (0.8286) time: 4.3095 data: 4.2222 max mem: 9377 +Eval (hcp-val): [64] [61/62] eta: 0:00:00 loss: 0.8321 (0.8332) time: 0.1303 data: 0.1051 max mem: 9377 +Eval (hcp-val): [64] Total time: 0:00:12 (0.2064 s / it) +Averaged stats (hcp-val): loss: 0.8321 (0.8332) +Making plots (hcp-val): example=42 +Eval (nsd-val): [64] [ 0/62] eta: 0:04:44 loss: 0.8028 (0.8028) time: 4.5888 data: 4.5347 max mem: 9377 +Eval (nsd-val): [64] [61/62] eta: 0:00:00 loss: 0.8089 (0.8105) time: 0.1126 data: 0.0853 max mem: 9377 +Eval (nsd-val): [64] Total time: 0:00:12 (0.2095 s / it) +Averaged stats (nsd-val): loss: 0.8089 (0.8105) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00064.pth +Train: [65] [ 0/6250] eta: 9:54:15 lr: 0.000037 grad: 0.0758 (0.0758) loss: 0.8864 (0.8864) time: 5.7049 data: 5.5571 max mem: 9377 +Train: [65] [ 100/6250] eta: 0:19:50 lr: 0.000037 grad: 0.1016 (0.1236) loss: 0.8341 (0.8350) time: 0.1472 data: 0.0527 max mem: 9377 +Train: [65] [ 200/6250] eta: 0:16:37 lr: 0.000037 grad: 0.1150 (0.1194) loss: 0.8242 (0.8296) time: 0.1274 data: 0.0358 max mem: 9377 +Train: [65] [ 300/6250] eta: 0:15:31 lr: 0.000037 grad: 0.1050 (0.1173) loss: 0.8188 (0.8246) time: 0.1293 data: 0.0325 max mem: 9377 +Train: [65] [ 400/6250] eta: 0:14:48 lr: 0.000037 grad: 0.1033 (0.1165) loss: 0.8202 (0.8216) time: 0.1471 data: 0.0573 max mem: 9377 +Train: [65] [ 500/6250] eta: 0:14:27 lr: 0.000037 grad: 0.1092 (0.1156) loss: 0.8118 (0.8201) time: 0.1291 data: 0.0370 max mem: 9377 +Train: [65] [ 600/6250] eta: 0:14:09 lr: 0.000037 grad: 0.1075 (0.1148) loss: 0.8152 (0.8192) time: 0.1540 data: 0.0740 max mem: 9377 +Train: [65] [ 700/6250] eta: 0:13:56 lr: 0.000037 grad: 0.1008 (0.1143) loss: 0.8102 (0.8186) time: 0.1652 data: 0.0771 max mem: 9377 +Train: [65] [ 800/6250] eta: 0:13:38 lr: 0.000037 grad: 0.1101 (0.1137) loss: 0.8113 (0.8180) time: 0.1478 data: 0.0558 max mem: 9377 +Train: [65] [ 900/6250] eta: 0:13:22 lr: 0.000037 grad: 0.1041 (0.1134) loss: 0.8113 (0.8174) time: 0.1322 data: 0.0454 max mem: 9377 +Train: [65] [1000/6250] eta: 0:13:05 lr: 0.000037 grad: 0.0988 (0.1128) loss: 0.8206 (0.8172) time: 0.1407 data: 0.0542 max mem: 9377 +Train: [65] [1100/6250] eta: 0:12:51 lr: 0.000037 grad: 0.1092 (0.1127) loss: 0.8172 (0.8169) time: 0.1449 data: 0.0641 max mem: 9377 +Train: [65] [1200/6250] eta: 0:12:37 lr: 0.000037 grad: 0.1096 (0.1129) loss: 0.8154 (0.8164) time: 0.1551 data: 0.0748 max mem: 9377 +Train: [65] [1300/6250] eta: 0:12:25 lr: 0.000037 grad: 0.1048 (0.1129) loss: 0.8200 (0.8162) time: 0.1469 data: 0.0679 max mem: 9377 +Train: [65] [1400/6250] eta: 0:12:07 lr: 0.000037 grad: 0.1068 (0.1128) loss: 0.8156 (0.8159) time: 0.1381 data: 0.0542 max mem: 9377 +Train: [65] [1500/6250] eta: 0:11:52 lr: 0.000037 grad: 0.1096 (0.1128) loss: 0.8069 (0.8155) time: 0.1202 data: 0.0346 max mem: 9377 +Train: [65] [1600/6250] eta: 0:11:37 lr: 0.000037 grad: 0.1084 (0.1130) loss: 0.8140 (0.8151) time: 0.1402 data: 0.0527 max mem: 9377 +Train: [65] [1700/6250] eta: 0:11:23 lr: 0.000037 grad: 0.1139 (0.1131) loss: 0.8001 (0.8145) time: 0.1442 data: 0.0620 max mem: 9377 +Train: [65] [1800/6250] eta: 0:11:08 lr: 0.000037 grad: 0.1060 (0.1131) loss: 0.8152 (0.8141) time: 0.1425 data: 0.0563 max mem: 9377 +Train: [65] [1900/6250] eta: 0:10:55 lr: 0.000037 grad: 0.1138 (0.1132) loss: 0.8084 (0.8138) time: 0.1682 data: 0.0874 max mem: 9377 +Train: [65] [2000/6250] eta: 0:10:40 lr: 0.000037 grad: 0.1124 (0.1134) loss: 0.8092 (0.8135) time: 0.1241 data: 0.0469 max mem: 9377 +Train: [65] [2100/6250] eta: 0:10:24 lr: 0.000037 grad: 0.1024 (0.1135) loss: 0.8144 (0.8133) time: 0.1498 data: 0.0747 max mem: 9377 +Train: [65] [2200/6250] eta: 0:10:09 lr: 0.000037 grad: 0.1102 (0.1136) loss: 0.8065 (0.8131) time: 0.1402 data: 0.0576 max mem: 9377 +Train: [65] [2300/6250] eta: 0:09:54 lr: 0.000037 grad: 0.1118 (0.1138) loss: 0.8114 (0.8129) time: 0.1320 data: 0.0452 max mem: 9377 +Train: [65] [2400/6250] eta: 0:09:37 lr: 0.000037 grad: 0.1121 (0.1140) loss: 0.8094 (0.8127) time: 0.1336 data: 0.0564 max mem: 9377 +Train: [65] [2500/6250] eta: 0:09:21 lr: 0.000037 grad: 0.1166 (0.1141) loss: 0.8076 (0.8125) time: 0.1315 data: 0.0482 max mem: 9377 +Train: [65] [2600/6250] eta: 0:09:05 lr: 0.000037 grad: 0.1162 (0.1140) loss: 0.8078 (0.8124) time: 0.1447 data: 0.0578 max mem: 9377 +Train: [65] [2700/6250] eta: 0:08:50 lr: 0.000037 grad: 0.1192 (0.1141) loss: 0.8082 (0.8123) time: 0.1514 data: 0.0684 max mem: 9377 +Train: [65] [2800/6250] eta: 0:08:34 lr: 0.000037 grad: 0.1118 (0.1141) loss: 0.8059 (0.8122) time: 0.1486 data: 0.0717 max mem: 9377 +Train: [65] [2900/6250] eta: 0:08:19 lr: 0.000037 grad: 0.1093 (0.1143) loss: 0.8089 (0.8120) time: 0.1375 data: 0.0574 max mem: 9377 +Train: [65] [3000/6250] eta: 0:08:03 lr: 0.000036 grad: 0.1158 (0.1145) loss: 0.8078 (0.8118) time: 0.1488 data: 0.0722 max mem: 9377 +Train: [65] [3100/6250] eta: 0:07:49 lr: 0.000036 grad: 0.1150 (0.1147) loss: 0.8064 (0.8116) time: 0.1604 data: 0.0823 max mem: 9377 +Train: [65] [3200/6250] eta: 0:07:35 lr: 0.000036 grad: 0.1133 (0.1147) loss: 0.8089 (0.8115) time: 0.1862 data: 0.1056 max mem: 9377 +Train: [65] [3300/6250] eta: 0:07:22 lr: 0.000036 grad: 0.1142 (0.1147) loss: 0.8058 (0.8114) time: 0.1694 data: 0.0798 max mem: 9377 +Train: [65] [3400/6250] eta: 0:07:07 lr: 0.000036 grad: 0.1102 (0.1147) loss: 0.8177 (0.8114) time: 0.1600 data: 0.0720 max mem: 9377 +Train: [65] [3500/6250] eta: 0:06:53 lr: 0.000036 grad: 0.1041 (0.1148) loss: 0.8151 (0.8114) time: 0.1542 data: 0.0689 max mem: 9377 +Train: [65] [3600/6250] eta: 0:06:38 lr: 0.000036 grad: 0.1118 (0.1148) loss: 0.8098 (0.8113) time: 0.1834 data: 0.1042 max mem: 9377 +Train: [65] [3700/6250] eta: 0:06:23 lr: 0.000036 grad: 0.1065 (0.1148) loss: 0.8136 (0.8113) time: 0.1563 data: 0.0737 max mem: 9377 +Train: [65] [3800/6250] eta: 0:06:08 lr: 0.000036 grad: 0.1052 (0.1147) loss: 0.8090 (0.8114) time: 0.1502 data: 0.0698 max mem: 9377 +Train: [65] [3900/6250] eta: 0:05:53 lr: 0.000036 grad: 0.1168 (0.1147) loss: 0.8034 (0.8114) time: 0.1377 data: 0.0574 max mem: 9377 +Train: [65] [4000/6250] eta: 0:05:38 lr: 0.000036 grad: 0.1076 (0.1146) loss: 0.8155 (0.8113) time: 0.1529 data: 0.0744 max mem: 9377 +Train: [65] [4100/6250] eta: 0:05:22 lr: 0.000036 grad: 0.1064 (0.1146) loss: 0.8136 (0.8113) time: 0.1564 data: 0.0772 max mem: 9377 +Train: [65] [4200/6250] eta: 0:05:06 lr: 0.000036 grad: 0.1123 (0.1145) loss: 0.8076 (0.8113) time: 0.1463 data: 0.0629 max mem: 9377 +Train: [65] [4300/6250] eta: 0:04:51 lr: 0.000036 grad: 0.1122 (0.1145) loss: 0.8088 (0.8113) time: 0.1364 data: 0.0598 max mem: 9377 +Train: [65] [4400/6250] eta: 0:04:36 lr: 0.000036 grad: 0.1125 (0.1145) loss: 0.8143 (0.8113) time: 0.1525 data: 0.0713 max mem: 9377 +Train: [65] [4500/6250] eta: 0:04:21 lr: 0.000036 grad: 0.1003 (0.1144) loss: 0.8144 (0.8113) time: 0.1567 data: 0.0739 max mem: 9377 +Train: [65] [4600/6250] eta: 0:04:07 lr: 0.000036 grad: 0.1073 (0.1144) loss: 0.8097 (0.8114) time: 0.1668 data: 0.0912 max mem: 9377 +Train: [65] [4700/6250] eta: 0:03:53 lr: 0.000036 grad: 0.1087 (0.1144) loss: 0.8126 (0.8114) time: 0.1887 data: 0.1063 max mem: 9377 +Train: [65] [4800/6250] eta: 0:03:37 lr: 0.000036 grad: 0.1130 (0.1144) loss: 0.8077 (0.8114) time: 0.1541 data: 0.0699 max mem: 9377 +Train: [65] [4900/6250] eta: 0:03:23 lr: 0.000036 grad: 0.1164 (0.1144) loss: 0.8069 (0.8114) time: 0.1471 data: 0.0619 max mem: 9377 +Train: [65] [5000/6250] eta: 0:03:08 lr: 0.000036 grad: 0.1106 (0.1145) loss: 0.8083 (0.8113) time: 0.1682 data: 0.0870 max mem: 9377 +Train: [65] [5100/6250] eta: 0:02:53 lr: 0.000036 grad: 0.1073 (0.1145) loss: 0.8142 (0.8112) time: 0.1744 data: 0.0958 max mem: 9377 +Train: [65] [5200/6250] eta: 0:02:38 lr: 0.000036 grad: 0.1075 (0.1145) loss: 0.8082 (0.8112) time: 0.1432 data: 0.0571 max mem: 9377 +Train: [65] [5300/6250] eta: 0:02:23 lr: 0.000036 grad: 0.1154 (0.1145) loss: 0.8123 (0.8111) time: 0.1593 data: 0.0865 max mem: 9377 +Train: [65] [5400/6250] eta: 0:02:08 lr: 0.000036 grad: 0.1190 (0.1146) loss: 0.8066 (0.8111) time: 0.1714 data: 0.0880 max mem: 9377 +Train: [65] [5500/6250] eta: 0:01:53 lr: 0.000036 grad: 0.1074 (0.1146) loss: 0.8116 (0.8111) time: 0.1587 data: 0.0713 max mem: 9377 +Train: [65] [5600/6250] eta: 0:01:38 lr: 0.000036 grad: 0.1120 (0.1146) loss: 0.8113 (0.8111) time: 0.1418 data: 0.0583 max mem: 9377 +Train: [65] [5700/6250] eta: 0:01:23 lr: 0.000036 grad: 0.1041 (0.1147) loss: 0.8192 (0.8112) time: 0.1422 data: 0.0517 max mem: 9377 +Train: [65] [5800/6250] eta: 0:01:08 lr: 0.000036 grad: 0.1110 (0.1147) loss: 0.8086 (0.8111) time: 0.1333 data: 0.0298 max mem: 9377 +Train: [65] [5900/6250] eta: 0:00:52 lr: 0.000036 grad: 0.1189 (0.1147) loss: 0.8082 (0.8111) time: 0.1323 data: 0.0455 max mem: 9377 +Train: [65] [6000/6250] eta: 0:00:37 lr: 0.000036 grad: 0.1169 (0.1148) loss: 0.8062 (0.8110) time: 0.1441 data: 0.0597 max mem: 9377 +Train: [65] [6100/6250] eta: 0:00:22 lr: 0.000036 grad: 0.1189 (0.1148) loss: 0.8081 (0.8110) time: 0.1279 data: 0.0317 max mem: 9377 +Train: [65] [6200/6250] eta: 0:00:07 lr: 0.000036 grad: 0.1143 (0.1149) loss: 0.8077 (0.8110) time: 0.1448 data: 0.0615 max mem: 9377 +Train: [65] [6249/6250] eta: 0:00:00 lr: 0.000036 grad: 0.1148 (0.1149) loss: 0.8045 (0.8109) time: 0.1360 data: 0.0479 max mem: 9377 +Train: [65] Total time: 0:15:46 (0.1514 s / it) +Averaged stats: lr: 0.000036 grad: 0.1148 (0.1149) loss: 0.8045 (0.8109) +Eval (hcp-train-subset): [65] [ 0/62] eta: 0:05:29 loss: 0.8240 (0.8240) time: 5.3093 data: 5.2755 max mem: 9377 +Eval (hcp-train-subset): [65] [61/62] eta: 0:00:00 loss: 0.8252 (0.8249) time: 0.1014 data: 0.0762 max mem: 9377 +Eval (hcp-train-subset): [65] Total time: 0:00:15 (0.2494 s / it) +Averaged stats (hcp-train-subset): loss: 0.8252 (0.8249) +Eval (hcp-val): [65] [ 0/62] eta: 0:06:41 loss: 0.8324 (0.8324) time: 6.4708 data: 6.4397 max mem: 9377 +Eval (hcp-val): [65] [61/62] eta: 0:00:00 loss: 0.8319 (0.8331) time: 0.1461 data: 0.1208 max mem: 9377 +Eval (hcp-val): [65] Total time: 0:00:15 (0.2428 s / it) +Averaged stats (hcp-val): loss: 0.8319 (0.8331) +Eval (nsd-val): [65] [ 0/62] eta: 0:04:15 loss: 0.7988 (0.7988) time: 4.1159 data: 4.0255 max mem: 9377 +Eval (nsd-val): [65] [61/62] eta: 0:00:00 loss: 0.8099 (0.8104) time: 0.1287 data: 0.1032 max mem: 9377 +Eval (nsd-val): [65] Total time: 0:00:14 (0.2402 s / it) +Averaged stats (nsd-val): loss: 0.8099 (0.8104) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [66] [ 0/6250] eta: 8:30:42 lr: 0.000036 grad: 0.2057 (0.2057) loss: 0.8396 (0.8396) time: 4.9028 data: 4.5492 max mem: 9377 +Train: [66] [ 100/6250] eta: 0:20:39 lr: 0.000035 grad: 0.1263 (0.1525) loss: 0.8205 (0.8088) time: 0.1511 data: 0.0488 max mem: 9377 +Train: [66] [ 200/6250] eta: 0:17:45 lr: 0.000035 grad: 0.1212 (0.1407) loss: 0.8169 (0.8092) time: 0.1457 data: 0.0415 max mem: 9377 +Train: [66] [ 300/6250] eta: 0:16:36 lr: 0.000035 grad: 0.1186 (0.1374) loss: 0.8116 (0.8091) time: 0.1391 data: 0.0446 max mem: 9377 +Train: [66] [ 400/6250] eta: 0:15:38 lr: 0.000035 grad: 0.1180 (0.1332) loss: 0.8061 (0.8095) time: 0.1457 data: 0.0451 max mem: 9377 +Train: [66] [ 500/6250] eta: 0:15:05 lr: 0.000035 grad: 0.1182 (0.1298) loss: 0.8168 (0.8101) time: 0.1573 data: 0.0696 max mem: 9377 +Train: [66] [ 600/6250] eta: 0:14:29 lr: 0.000035 grad: 0.1100 (0.1275) loss: 0.8156 (0.8108) time: 0.1363 data: 0.0419 max mem: 9377 +Train: [66] [ 700/6250] eta: 0:14:07 lr: 0.000035 grad: 0.1065 (0.1254) loss: 0.8143 (0.8113) time: 0.1417 data: 0.0469 max mem: 9377 +Train: [66] [ 800/6250] eta: 0:13:51 lr: 0.000035 grad: 0.1094 (0.1243) loss: 0.8188 (0.8117) time: 0.1665 data: 0.0766 max mem: 9377 +Train: [66] [ 900/6250] eta: 0:13:32 lr: 0.000035 grad: 0.1135 (0.1234) loss: 0.8111 (0.8120) time: 0.1685 data: 0.0827 max mem: 9377 +Train: [66] [1000/6250] eta: 0:13:12 lr: 0.000035 grad: 0.1147 (0.1228) loss: 0.8132 (0.8120) time: 0.1424 data: 0.0581 max mem: 9377 +Train: [66] [1100/6250] eta: 0:12:55 lr: 0.000035 grad: 0.1060 (0.1220) loss: 0.8146 (0.8120) time: 0.1434 data: 0.0584 max mem: 9377 +Train: [66] [1200/6250] eta: 0:12:37 lr: 0.000035 grad: 0.1045 (0.1212) loss: 0.8129 (0.8119) time: 0.1394 data: 0.0570 max mem: 9377 +Train: [66] [1300/6250] eta: 0:12:20 lr: 0.000035 grad: 0.1190 (0.1207) loss: 0.8050 (0.8117) time: 0.1402 data: 0.0576 max mem: 9377 +Train: [66] [1400/6250] eta: 0:12:02 lr: 0.000035 grad: 0.1187 (0.1202) loss: 0.8044 (0.8115) time: 0.1372 data: 0.0514 max mem: 9377 +Train: [66] [1500/6250] eta: 0:11:45 lr: 0.000035 grad: 0.1128 (0.1201) loss: 0.8048 (0.8114) time: 0.1387 data: 0.0544 max mem: 9377 +Train: [66] [1600/6250] eta: 0:11:30 lr: 0.000035 grad: 0.1021 (0.1197) loss: 0.8174 (0.8113) time: 0.1586 data: 0.0680 max mem: 9377 +Train: [66] [1700/6250] eta: 0:11:13 lr: 0.000035 grad: 0.1099 (0.1194) loss: 0.8101 (0.8112) time: 0.1169 data: 0.0350 max mem: 9377 +Train: [66] [1800/6250] eta: 0:10:57 lr: 0.000035 grad: 0.1128 (0.1193) loss: 0.8073 (0.8111) time: 0.1642 data: 0.0819 max mem: 9377 +Train: [66] [1900/6250] eta: 0:10:39 lr: 0.000035 grad: 0.1235 (0.1192) loss: 0.8064 (0.8110) time: 0.1237 data: 0.0304 max mem: 9377 +Train: [66] [2000/6250] eta: 0:10:23 lr: 0.000035 grad: 0.1166 (0.1190) loss: 0.8039 (0.8109) time: 0.1413 data: 0.0624 max mem: 9377 +Train: [66] [2100/6250] eta: 0:10:05 lr: 0.000035 grad: 0.1130 (0.1189) loss: 0.8052 (0.8108) time: 0.1361 data: 0.0550 max mem: 9377 +Train: [66] [2200/6250] eta: 0:09:49 lr: 0.000035 grad: 0.1110 (0.1189) loss: 0.8143 (0.8106) time: 0.1127 data: 0.0220 max mem: 9377 +Train: [66] [2300/6250] eta: 0:09:33 lr: 0.000035 grad: 0.1099 (0.1187) loss: 0.8091 (0.8105) time: 0.1281 data: 0.0336 max mem: 9377 +Train: [66] [2400/6250] eta: 0:09:18 lr: 0.000035 grad: 0.1086 (0.1185) loss: 0.8072 (0.8105) time: 0.1392 data: 0.0546 max mem: 9377 +Train: [66] [2500/6250] eta: 0:09:03 lr: 0.000035 grad: 0.1138 (0.1185) loss: 0.8089 (0.8103) time: 0.1282 data: 0.0437 max mem: 9377 +Train: [66] [2600/6250] eta: 0:08:48 lr: 0.000035 grad: 0.1023 (0.1182) loss: 0.8095 (0.8103) time: 0.1419 data: 0.0542 max mem: 9377 +Train: [66] [2700/6250] eta: 0:08:33 lr: 0.000035 grad: 0.1100 (0.1181) loss: 0.8144 (0.8104) time: 0.1340 data: 0.0459 max mem: 9377 +Train: [66] [2800/6250] eta: 0:08:20 lr: 0.000035 grad: 0.1088 (0.1179) loss: 0.8129 (0.8105) time: 0.2197 data: 0.1430 max mem: 9377 +Train: [66] [2900/6250] eta: 0:08:07 lr: 0.000035 grad: 0.1141 (0.1177) loss: 0.8081 (0.8106) time: 0.1505 data: 0.0679 max mem: 9377 +Train: [66] [3000/6250] eta: 0:07:54 lr: 0.000035 grad: 0.1128 (0.1176) loss: 0.8111 (0.8107) time: 0.1485 data: 0.0596 max mem: 9377 +Train: [66] [3100/6250] eta: 0:07:41 lr: 0.000035 grad: 0.1162 (0.1176) loss: 0.8120 (0.8107) time: 0.1425 data: 0.0493 max mem: 9377 +Train: [66] [3200/6250] eta: 0:07:28 lr: 0.000035 grad: 0.1172 (0.1175) loss: 0.8080 (0.8107) time: 0.1596 data: 0.0759 max mem: 9377 +Train: [66] [3300/6250] eta: 0:07:16 lr: 0.000035 grad: 0.1168 (0.1175) loss: 0.8104 (0.8107) time: 0.1900 data: 0.1090 max mem: 9377 +Train: [66] [3400/6250] eta: 0:07:02 lr: 0.000035 grad: 0.1146 (0.1175) loss: 0.8048 (0.8106) time: 0.1724 data: 0.0781 max mem: 9377 +Train: [66] [3500/6250] eta: 0:06:47 lr: 0.000034 grad: 0.1151 (0.1175) loss: 0.8099 (0.8105) time: 0.1611 data: 0.0751 max mem: 9377 +Train: [66] [3600/6250] eta: 0:06:32 lr: 0.000034 grad: 0.1123 (0.1175) loss: 0.8091 (0.8104) time: 0.1122 data: 0.0189 max mem: 9377 +Train: [66] [3700/6250] eta: 0:06:17 lr: 0.000034 grad: 0.1147 (0.1176) loss: 0.8093 (0.8103) time: 0.1537 data: 0.0664 max mem: 9377 +Train: [66] [3800/6250] eta: 0:06:02 lr: 0.000034 grad: 0.1112 (0.1175) loss: 0.8094 (0.8102) time: 0.1598 data: 0.0775 max mem: 9377 +Train: [66] [3900/6250] eta: 0:05:47 lr: 0.000034 grad: 0.1132 (0.1174) loss: 0.8071 (0.8101) time: 0.1426 data: 0.0553 max mem: 9377 +Train: [66] [4000/6250] eta: 0:05:32 lr: 0.000034 grad: 0.1155 (0.1173) loss: 0.8113 (0.8101) time: 0.1441 data: 0.0644 max mem: 9377 +Train: [66] [4100/6250] eta: 0:05:17 lr: 0.000034 grad: 0.1102 (0.1172) loss: 0.8096 (0.8101) time: 0.1402 data: 0.0629 max mem: 9377 +Train: [66] [4200/6250] eta: 0:05:03 lr: 0.000034 grad: 0.1136 (0.1172) loss: 0.8086 (0.8099) time: 0.1385 data: 0.0564 max mem: 9377 +Train: [66] [4300/6250] eta: 0:04:48 lr: 0.000034 grad: 0.1137 (0.1173) loss: 0.8042 (0.8099) time: 0.1506 data: 0.0680 max mem: 9377 +Train: [66] [4400/6250] eta: 0:04:33 lr: 0.000034 grad: 0.1160 (0.1173) loss: 0.8125 (0.8098) time: 0.1275 data: 0.0376 max mem: 9377 +Train: [66] [4500/6250] eta: 0:04:18 lr: 0.000034 grad: 0.1141 (0.1173) loss: 0.8136 (0.8098) time: 0.1486 data: 0.0653 max mem: 9377 +Train: [66] [4600/6250] eta: 0:04:03 lr: 0.000034 grad: 0.1057 (0.1173) loss: 0.8129 (0.8098) time: 0.1636 data: 0.0823 max mem: 9377 +Train: [66] [4700/6250] eta: 0:03:48 lr: 0.000034 grad: 0.1047 (0.1172) loss: 0.8092 (0.8098) time: 0.1548 data: 0.0724 max mem: 9377 +Train: [66] [4800/6250] eta: 0:03:34 lr: 0.000034 grad: 0.1100 (0.1172) loss: 0.8094 (0.8098) time: 0.1561 data: 0.0614 max mem: 9377 +Train: [66] [4900/6250] eta: 0:03:19 lr: 0.000034 grad: 0.1113 (0.1171) loss: 0.8088 (0.8098) time: 0.1773 data: 0.0912 max mem: 9377 +Train: [66] [5000/6250] eta: 0:03:04 lr: 0.000034 grad: 0.1133 (0.1170) loss: 0.8123 (0.8098) time: 0.1483 data: 0.0643 max mem: 9377 +Train: [66] [5100/6250] eta: 0:02:49 lr: 0.000034 grad: 0.1117 (0.1169) loss: 0.8069 (0.8098) time: 0.1575 data: 0.0798 max mem: 9377 +Train: [66] [5200/6250] eta: 0:02:35 lr: 0.000034 grad: 0.1101 (0.1169) loss: 0.8159 (0.8099) time: 0.1622 data: 0.0811 max mem: 9377 +Train: [66] [5300/6250] eta: 0:02:20 lr: 0.000034 grad: 0.1144 (0.1168) loss: 0.8104 (0.8100) time: 0.1484 data: 0.0641 max mem: 9377 +Train: [66] [5400/6250] eta: 0:02:05 lr: 0.000034 grad: 0.1135 (0.1167) loss: 0.8112 (0.8100) time: 0.1491 data: 0.0649 max mem: 9377 +Train: [66] [5500/6250] eta: 0:01:50 lr: 0.000034 grad: 0.1089 (0.1167) loss: 0.8080 (0.8100) time: 0.1290 data: 0.0380 max mem: 9377 +Train: [66] [5600/6250] eta: 0:01:35 lr: 0.000034 grad: 0.1131 (0.1167) loss: 0.8166 (0.8100) time: 0.1266 data: 0.0423 max mem: 9377 +Train: [66] [5700/6250] eta: 0:01:20 lr: 0.000034 grad: 0.1129 (0.1168) loss: 0.8040 (0.8100) time: 0.1352 data: 0.0485 max mem: 9377 +Train: [66] [5800/6250] eta: 0:01:06 lr: 0.000034 grad: 0.1124 (0.1168) loss: 0.8103 (0.8100) time: 0.1446 data: 0.0659 max mem: 9377 +Train: [66] [5900/6250] eta: 0:00:51 lr: 0.000034 grad: 0.1155 (0.1167) loss: 0.8109 (0.8100) time: 0.1516 data: 0.0729 max mem: 9377 +Train: [66] [6000/6250] eta: 0:00:36 lr: 0.000034 grad: 0.1162 (0.1168) loss: 0.8094 (0.8100) time: 0.1795 data: 0.0975 max mem: 9377 +Train: [66] [6100/6250] eta: 0:00:22 lr: 0.000034 grad: 0.1188 (0.1168) loss: 0.7996 (0.8100) time: 0.1571 data: 0.0794 max mem: 9377 +Train: [66] [6200/6250] eta: 0:00:07 lr: 0.000034 grad: 0.1119 (0.1168) loss: 0.8081 (0.8100) time: 0.1796 data: 0.1024 max mem: 9377 +Train: [66] [6249/6250] eta: 0:00:00 lr: 0.000034 grad: 0.1148 (0.1168) loss: 0.8068 (0.8099) time: 0.1427 data: 0.0618 max mem: 9377 +Train: [66] Total time: 0:15:25 (0.1481 s / it) +Averaged stats: lr: 0.000034 grad: 0.1148 (0.1168) loss: 0.8068 (0.8099) +Eval (hcp-train-subset): [66] [ 0/62] eta: 0:06:10 loss: 0.8280 (0.8280) time: 5.9814 data: 5.9505 max mem: 9377 +Eval (hcp-train-subset): [66] [61/62] eta: 0:00:00 loss: 0.8244 (0.8243) time: 0.1294 data: 0.0991 max mem: 9377 +Eval (hcp-train-subset): [66] Total time: 0:00:14 (0.2269 s / it) +Averaged stats (hcp-train-subset): loss: 0.8244 (0.8243) +Eval (hcp-val): [66] [ 0/62] eta: 0:04:49 loss: 0.8280 (0.8280) time: 4.6673 data: 4.6359 max mem: 9377 +Eval (hcp-val): [66] [61/62] eta: 0:00:00 loss: 0.8314 (0.8332) time: 0.1307 data: 0.1034 max mem: 9377 +Eval (hcp-val): [66] Total time: 0:00:13 (0.2191 s / it) +Averaged stats (hcp-val): loss: 0.8314 (0.8332) +Eval (nsd-val): [66] [ 0/62] eta: 0:04:45 loss: 0.8030 (0.8030) time: 4.6046 data: 4.5748 max mem: 9377 +Eval (nsd-val): [66] [61/62] eta: 0:00:00 loss: 0.8108 (0.8130) time: 0.1223 data: 0.0950 max mem: 9377 +Eval (nsd-val): [66] Total time: 0:00:13 (0.2158 s / it) +Averaged stats (nsd-val): loss: 0.8108 (0.8130) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [67] [ 0/6250] eta: 10:13:35 lr: 0.000034 grad: 0.1004 (0.1004) loss: 0.8360 (0.8360) time: 5.8905 data: 5.7259 max mem: 9377 +Train: [67] [ 100/6250] eta: 0:21:59 lr: 0.000034 grad: 0.1200 (0.1519) loss: 0.8055 (0.8176) time: 0.1454 data: 0.0549 max mem: 9377 +Train: [67] [ 200/6250] eta: 0:19:19 lr: 0.000034 grad: 0.1444 (0.1462) loss: 0.8107 (0.8150) time: 0.1999 data: 0.1049 max mem: 9377 +Train: [67] [ 300/6250] eta: 0:17:38 lr: 0.000034 grad: 0.1312 (0.1473) loss: 0.8167 (0.8116) time: 0.1393 data: 0.0368 max mem: 9377 +Train: [67] [ 400/6250] eta: 0:16:37 lr: 0.000034 grad: 0.1205 (0.1428) loss: 0.8115 (0.8100) time: 0.1527 data: 0.0605 max mem: 9377 +Train: [67] [ 500/6250] eta: 0:15:48 lr: 0.000034 grad: 0.1183 (0.1383) loss: 0.8094 (0.8102) time: 0.1430 data: 0.0537 max mem: 9377 +Train: [67] [ 600/6250] eta: 0:15:18 lr: 0.000033 grad: 0.1162 (0.1356) loss: 0.8041 (0.8098) time: 0.1529 data: 0.0691 max mem: 9377 +Train: [67] [ 700/6250] eta: 0:14:56 lr: 0.000033 grad: 0.1185 (0.1339) loss: 0.8086 (0.8092) time: 0.1683 data: 0.0820 max mem: 9377 +Train: [67] [ 800/6250] eta: 0:14:40 lr: 0.000033 grad: 0.1128 (0.1324) loss: 0.8006 (0.8090) time: 0.1585 data: 0.0653 max mem: 9377 +Train: [67] [ 900/6250] eta: 0:14:34 lr: 0.000033 grad: 0.1114 (0.1307) loss: 0.8163 (0.8093) time: 0.1866 data: 0.0980 max mem: 9377 +Train: [67] [1000/6250] eta: 0:14:19 lr: 0.000033 grad: 0.1132 (0.1290) loss: 0.8131 (0.8095) time: 0.1191 data: 0.0336 max mem: 9377 +Train: [67] [1100/6250] eta: 0:13:58 lr: 0.000033 grad: 0.1084 (0.1275) loss: 0.8114 (0.8098) time: 0.1471 data: 0.0618 max mem: 9377 +Train: [67] [1200/6250] eta: 0:13:38 lr: 0.000033 grad: 0.1089 (0.1262) loss: 0.8137 (0.8102) time: 0.1715 data: 0.0883 max mem: 9377 +Train: [67] [1300/6250] eta: 0:13:18 lr: 0.000033 grad: 0.1122 (0.1252) loss: 0.8117 (0.8101) time: 0.1543 data: 0.0682 max mem: 9377 +Train: [67] [1400/6250] eta: 0:12:59 lr: 0.000033 grad: 0.1132 (0.1241) loss: 0.8077 (0.8102) time: 0.1522 data: 0.0703 max mem: 9377 +Train: [67] [1500/6250] eta: 0:12:41 lr: 0.000033 grad: 0.1117 (0.1234) loss: 0.8098 (0.8103) time: 0.1394 data: 0.0559 max mem: 9377 +Train: [67] [1600/6250] eta: 0:12:21 lr: 0.000033 grad: 0.1066 (0.1227) loss: 0.8150 (0.8103) time: 0.1430 data: 0.0533 max mem: 9377 +Train: [67] [1700/6250] eta: 0:12:02 lr: 0.000033 grad: 0.1104 (0.1224) loss: 0.8051 (0.8101) time: 0.1272 data: 0.0388 max mem: 9377 +Train: [67] [1800/6250] eta: 0:11:44 lr: 0.000033 grad: 0.1161 (0.1222) loss: 0.8052 (0.8099) time: 0.1485 data: 0.0696 max mem: 9377 +Train: [67] [1900/6250] eta: 0:11:27 lr: 0.000033 grad: 0.1146 (0.1219) loss: 0.8017 (0.8096) time: 0.1497 data: 0.0649 max mem: 9377 +Train: [67] [2000/6250] eta: 0:11:10 lr: 0.000033 grad: 0.1171 (0.1217) loss: 0.8067 (0.8095) time: 0.1453 data: 0.0568 max mem: 9377 +Train: [67] [2100/6250] eta: 0:10:53 lr: 0.000033 grad: 0.1157 (0.1216) loss: 0.8085 (0.8094) time: 0.1576 data: 0.0732 max mem: 9377 +Train: [67] [2200/6250] eta: 0:10:36 lr: 0.000033 grad: 0.1164 (0.1215) loss: 0.8089 (0.8093) time: 0.1600 data: 0.0844 max mem: 9377 +Train: [67] [2300/6250] eta: 0:10:18 lr: 0.000033 grad: 0.1135 (0.1216) loss: 0.8109 (0.8092) time: 0.1519 data: 0.0730 max mem: 9377 +Train: [67] [2400/6250] eta: 0:10:01 lr: 0.000033 grad: 0.1093 (0.1214) loss: 0.8191 (0.8093) time: 0.1566 data: 0.0680 max mem: 9377 +Train: [67] [2500/6250] eta: 0:09:43 lr: 0.000033 grad: 0.1120 (0.1215) loss: 0.8097 (0.8092) time: 0.1561 data: 0.0798 max mem: 9377 +Train: [67] [2600/6250] eta: 0:09:29 lr: 0.000033 grad: 0.1132 (0.1215) loss: 0.8091 (0.8091) time: 0.1832 data: 0.0908 max mem: 9377 +Train: [67] [2700/6250] eta: 0:09:13 lr: 0.000033 grad: 0.1177 (0.1214) loss: 0.8074 (0.8091) time: 0.1723 data: 0.0830 max mem: 9377 +Train: [67] [2800/6250] eta: 0:08:57 lr: 0.000033 grad: 0.1163 (0.1214) loss: 0.8080 (0.8090) time: 0.1470 data: 0.0538 max mem: 9377 +Train: [67] [2900/6250] eta: 0:08:40 lr: 0.000033 grad: 0.1155 (0.1212) loss: 0.8067 (0.8090) time: 0.1100 data: 0.0281 max mem: 9377 +Train: [67] [3000/6250] eta: 0:08:22 lr: 0.000033 grad: 0.1214 (0.1211) loss: 0.8041 (0.8090) time: 0.1417 data: 0.0504 max mem: 9377 +Train: [67] [3100/6250] eta: 0:08:06 lr: 0.000033 grad: 0.1113 (0.1211) loss: 0.8091 (0.8090) time: 0.1355 data: 0.0522 max mem: 9377 +Train: [67] [3200/6250] eta: 0:07:49 lr: 0.000033 grad: 0.1152 (0.1210) loss: 0.8094 (0.8090) time: 0.1421 data: 0.0635 max mem: 9377 +Train: [67] [3300/6250] eta: 0:07:32 lr: 0.000033 grad: 0.1116 (0.1209) loss: 0.8058 (0.8089) time: 0.1266 data: 0.0395 max mem: 9377 +Train: [67] [3400/6250] eta: 0:07:16 lr: 0.000033 grad: 0.1168 (0.1208) loss: 0.8101 (0.8090) time: 0.1497 data: 0.0620 max mem: 9377 +Train: [67] [3500/6250] eta: 0:06:58 lr: 0.000033 grad: 0.1215 (0.1208) loss: 0.8063 (0.8090) time: 0.1391 data: 0.0550 max mem: 9377 +Train: [67] [3600/6250] eta: 0:06:42 lr: 0.000033 grad: 0.1146 (0.1206) loss: 0.8099 (0.8090) time: 0.1440 data: 0.0577 max mem: 9377 +Train: [67] [3700/6250] eta: 0:06:26 lr: 0.000033 grad: 0.1128 (0.1206) loss: 0.8021 (0.8090) time: 0.1370 data: 0.0447 max mem: 9377 +Train: [67] [3800/6250] eta: 0:06:10 lr: 0.000033 grad: 0.1128 (0.1205) loss: 0.8082 (0.8090) time: 0.1338 data: 0.0503 max mem: 9377 +Train: [67] [3900/6250] eta: 0:05:54 lr: 0.000033 grad: 0.1117 (0.1203) loss: 0.8119 (0.8090) time: 0.1556 data: 0.0719 max mem: 9377 +Train: [67] [4000/6250] eta: 0:05:38 lr: 0.000032 grad: 0.1124 (0.1202) loss: 0.8071 (0.8090) time: 0.1293 data: 0.0448 max mem: 9377 +Train: [67] [4100/6250] eta: 0:05:23 lr: 0.000032 grad: 0.1176 (0.1201) loss: 0.8042 (0.8090) time: 0.1333 data: 0.0528 max mem: 9377 +Train: [67] [4200/6250] eta: 0:05:07 lr: 0.000032 grad: 0.1111 (0.1200) loss: 0.8096 (0.8090) time: 0.1432 data: 0.0584 max mem: 9377 +Train: [67] [4300/6250] eta: 0:04:52 lr: 0.000032 grad: 0.1191 (0.1198) loss: 0.8084 (0.8091) time: 0.1373 data: 0.0546 max mem: 9377 +Train: [67] [4400/6250] eta: 0:04:36 lr: 0.000032 grad: 0.1119 (0.1198) loss: 0.8137 (0.8092) time: 0.1417 data: 0.0660 max mem: 9377 +Train: [67] [4500/6250] eta: 0:04:21 lr: 0.000032 grad: 0.1167 (0.1197) loss: 0.8094 (0.8093) time: 0.1366 data: 0.0468 max mem: 9377 +Train: [67] [4600/6250] eta: 0:04:07 lr: 0.000032 grad: 0.1151 (0.1196) loss: 0.8180 (0.8093) time: 0.1847 data: 0.0988 max mem: 9377 +Train: [67] [4700/6250] eta: 0:03:52 lr: 0.000032 grad: 0.1167 (0.1196) loss: 0.8065 (0.8093) time: 0.1507 data: 0.0721 max mem: 9377 +Train: [67] [4800/6250] eta: 0:03:37 lr: 0.000032 grad: 0.1115 (0.1195) loss: 0.8136 (0.8094) time: 0.1567 data: 0.0737 max mem: 9377 +Train: [67] [4900/6250] eta: 0:03:22 lr: 0.000032 grad: 0.1024 (0.1194) loss: 0.8175 (0.8095) time: 0.1323 data: 0.0462 max mem: 9377 +Train: [67] [5000/6250] eta: 0:03:07 lr: 0.000032 grad: 0.1074 (0.1193) loss: 0.8180 (0.8096) time: 0.1635 data: 0.0783 max mem: 9377 +Train: [67] [5100/6250] eta: 0:02:52 lr: 0.000032 grad: 0.1039 (0.1191) loss: 0.8163 (0.8097) time: 0.1450 data: 0.0594 max mem: 9377 +Train: [67] [5200/6250] eta: 0:02:37 lr: 0.000032 grad: 0.1118 (0.1190) loss: 0.8138 (0.8098) time: 0.1547 data: 0.0724 max mem: 9377 +Train: [67] [5300/6250] eta: 0:02:22 lr: 0.000032 grad: 0.1105 (0.1188) loss: 0.8172 (0.8099) time: 0.1379 data: 0.0476 max mem: 9377 +Train: [67] [5400/6250] eta: 0:02:07 lr: 0.000032 grad: 0.1136 (0.1188) loss: 0.8140 (0.8100) time: 0.1441 data: 0.0581 max mem: 9377 +Train: [67] [5500/6250] eta: 0:01:51 lr: 0.000032 grad: 0.1053 (0.1186) loss: 0.8170 (0.8101) time: 0.1233 data: 0.0434 max mem: 9377 +Train: [67] [5600/6250] eta: 0:01:36 lr: 0.000032 grad: 0.1148 (0.1185) loss: 0.8090 (0.8102) time: 0.1355 data: 0.0465 max mem: 9377 +Train: [67] [5700/6250] eta: 0:01:21 lr: 0.000032 grad: 0.1107 (0.1184) loss: 0.8160 (0.8103) time: 0.1067 data: 0.0056 max mem: 9377 +Train: [67] [5800/6250] eta: 0:01:06 lr: 0.000032 grad: 0.1140 (0.1183) loss: 0.8150 (0.8104) time: 0.1561 data: 0.0708 max mem: 9377 +Train: [67] [5900/6250] eta: 0:00:51 lr: 0.000032 grad: 0.1088 (0.1182) loss: 0.8094 (0.8105) time: 0.1248 data: 0.0453 max mem: 9377 +Train: [67] [6000/6250] eta: 0:00:37 lr: 0.000032 grad: 0.1100 (0.1181) loss: 0.8173 (0.8106) time: 0.1382 data: 0.0435 max mem: 9377 +Train: [67] [6100/6250] eta: 0:00:22 lr: 0.000032 grad: 0.1141 (0.1181) loss: 0.8140 (0.8107) time: 0.1265 data: 0.0294 max mem: 9377 +Train: [67] [6200/6250] eta: 0:00:07 lr: 0.000032 grad: 0.1097 (0.1180) loss: 0.8083 (0.8107) time: 0.1291 data: 0.0501 max mem: 9377 +Train: [67] [6249/6250] eta: 0:00:00 lr: 0.000032 grad: 0.1116 (0.1180) loss: 0.8134 (0.8108) time: 0.1406 data: 0.0592 max mem: 9377 +Train: [67] Total time: 0:15:32 (0.1492 s / it) +Averaged stats: lr: 0.000032 grad: 0.1116 (0.1180) loss: 0.8134 (0.8108) +Eval (hcp-train-subset): [67] [ 0/62] eta: 0:04:05 loss: 0.8268 (0.8268) time: 3.9627 data: 3.8549 max mem: 9377 +Eval (hcp-train-subset): [67] [61/62] eta: 0:00:00 loss: 0.8231 (0.8246) time: 0.1352 data: 0.1095 max mem: 9377 +Eval (hcp-train-subset): [67] Total time: 0:00:15 (0.2574 s / it) +Averaged stats (hcp-train-subset): loss: 0.8231 (0.8246) +Eval (hcp-val): [67] [ 0/62] eta: 0:03:48 loss: 0.8296 (0.8296) time: 3.6828 data: 3.6096 max mem: 9377 +Eval (hcp-val): [67] [61/62] eta: 0:00:00 loss: 0.8314 (0.8330) time: 0.1411 data: 0.1158 max mem: 9377 +Eval (hcp-val): [67] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (hcp-val): loss: 0.8314 (0.8330) +Eval (nsd-val): [67] [ 0/62] eta: 0:05:27 loss: 0.8000 (0.8000) time: 5.2815 data: 5.2503 max mem: 9377 +Eval (nsd-val): [67] [61/62] eta: 0:00:00 loss: 0.8122 (0.8136) time: 0.1213 data: 0.0944 max mem: 9377 +Eval (nsd-val): [67] Total time: 0:00:13 (0.2213 s / it) +Averaged stats (nsd-val): loss: 0.8122 (0.8136) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [68] [ 0/6250] eta: 10:12:54 lr: 0.000032 grad: 0.1867 (0.1867) loss: 0.8282 (0.8282) time: 5.8839 data: 5.7826 max mem: 9377 +Train: [68] [ 100/6250] eta: 0:21:25 lr: 0.000032 grad: 0.1600 (0.1574) loss: 0.8166 (0.8278) time: 0.1649 data: 0.0690 max mem: 9377 +Train: [68] [ 200/6250] eta: 0:17:55 lr: 0.000032 grad: 0.1067 (0.1444) loss: 0.8289 (0.8230) time: 0.1552 data: 0.0609 max mem: 9377 +Train: [68] [ 300/6250] eta: 0:16:34 lr: 0.000032 grad: 0.1242 (0.1401) loss: 0.8064 (0.8193) time: 0.1343 data: 0.0422 max mem: 9377 +Train: [68] [ 400/6250] eta: 0:15:36 lr: 0.000032 grad: 0.1189 (0.1364) loss: 0.8072 (0.8172) time: 0.1426 data: 0.0466 max mem: 9377 +Train: [68] [ 500/6250] eta: 0:14:52 lr: 0.000032 grad: 0.1115 (0.1334) loss: 0.8151 (0.8164) time: 0.1326 data: 0.0378 max mem: 9377 +Train: [68] [ 600/6250] eta: 0:14:16 lr: 0.000032 grad: 0.1118 (0.1311) loss: 0.8198 (0.8162) time: 0.1254 data: 0.0394 max mem: 9377 +Train: [68] [ 700/6250] eta: 0:13:55 lr: 0.000032 grad: 0.1219 (0.1304) loss: 0.8086 (0.8152) time: 0.1289 data: 0.0365 max mem: 9377 +Train: [68] [ 800/6250] eta: 0:13:37 lr: 0.000032 grad: 0.1199 (0.1292) loss: 0.8097 (0.8144) time: 0.1253 data: 0.0334 max mem: 9377 +Train: [68] [ 900/6250] eta: 0:13:21 lr: 0.000032 grad: 0.1117 (0.1278) loss: 0.8051 (0.8138) time: 0.1372 data: 0.0427 max mem: 9377 +Train: [68] [1000/6250] eta: 0:13:02 lr: 0.000032 grad: 0.1171 (0.1271) loss: 0.8083 (0.8132) time: 0.1466 data: 0.0556 max mem: 9377 +Train: [68] [1100/6250] eta: 0:12:43 lr: 0.000032 grad: 0.1148 (0.1262) loss: 0.7949 (0.8128) time: 0.1318 data: 0.0444 max mem: 9377 +Train: [68] [1200/6250] eta: 0:12:29 lr: 0.000032 grad: 0.1108 (0.1254) loss: 0.8071 (0.8126) time: 0.1540 data: 0.0677 max mem: 9377 +Train: [68] [1300/6250] eta: 0:12:14 lr: 0.000031 grad: 0.1138 (0.1246) loss: 0.8081 (0.8124) time: 0.1400 data: 0.0533 max mem: 9377 +Train: [68] [1400/6250] eta: 0:11:57 lr: 0.000031 grad: 0.1123 (0.1240) loss: 0.8126 (0.8124) time: 0.1299 data: 0.0463 max mem: 9377 +Train: [68] [1500/6250] eta: 0:11:41 lr: 0.000031 grad: 0.1130 (0.1235) loss: 0.8066 (0.8124) time: 0.1401 data: 0.0526 max mem: 9377 +Train: [68] [1600/6250] eta: 0:11:24 lr: 0.000031 grad: 0.1048 (0.1230) loss: 0.8176 (0.8123) time: 0.1467 data: 0.0606 max mem: 9377 +Train: [68] [1700/6250] eta: 0:11:09 lr: 0.000031 grad: 0.1136 (0.1226) loss: 0.8134 (0.8122) time: 0.1361 data: 0.0531 max mem: 9377 +Train: [68] [1800/6250] eta: 0:10:54 lr: 0.000031 grad: 0.1164 (0.1224) loss: 0.8126 (0.8120) time: 0.1515 data: 0.0689 max mem: 9377 +Train: [68] [1900/6250] eta: 0:10:39 lr: 0.000031 grad: 0.1089 (0.1219) loss: 0.8042 (0.8120) time: 0.1460 data: 0.0546 max mem: 9377 +Train: [68] [2000/6250] eta: 0:10:28 lr: 0.000031 grad: 0.1068 (0.1215) loss: 0.8047 (0.8119) time: 0.1932 data: 0.1124 max mem: 9377 +Train: [68] [2100/6250] eta: 0:10:13 lr: 0.000031 grad: 0.1141 (0.1212) loss: 0.8115 (0.8119) time: 0.1332 data: 0.0483 max mem: 9377 +Train: [68] [2200/6250] eta: 0:09:59 lr: 0.000031 grad: 0.1111 (0.1209) loss: 0.8118 (0.8119) time: 0.1400 data: 0.0586 max mem: 9377 +Train: [68] [2300/6250] eta: 0:09:44 lr: 0.000031 grad: 0.1184 (0.1209) loss: 0.8117 (0.8118) time: 0.1531 data: 0.0687 max mem: 9377 +Train: [68] [2400/6250] eta: 0:09:33 lr: 0.000031 grad: 0.1156 (0.1209) loss: 0.8047 (0.8117) time: 0.1442 data: 0.0579 max mem: 9377 +Train: [68] [2500/6250] eta: 0:09:20 lr: 0.000031 grad: 0.1192 (0.1211) loss: 0.8105 (0.8116) time: 0.1691 data: 0.0866 max mem: 9377 +Train: [68] [2600/6250] eta: 0:09:08 lr: 0.000031 grad: 0.1151 (0.1210) loss: 0.8131 (0.8115) time: 0.1766 data: 0.0915 max mem: 9377 +Train: [68] [2700/6250] eta: 0:08:55 lr: 0.000031 grad: 0.1215 (0.1211) loss: 0.8114 (0.8114) time: 0.1725 data: 0.0851 max mem: 9377 +Train: [68] [2800/6250] eta: 0:08:41 lr: 0.000031 grad: 0.1217 (0.1212) loss: 0.8016 (0.8112) time: 0.1539 data: 0.0665 max mem: 9377 +Train: [68] [2900/6250] eta: 0:08:28 lr: 0.000031 grad: 0.1171 (0.1211) loss: 0.8090 (0.8110) time: 0.1575 data: 0.0715 max mem: 9377 +Train: [68] [3000/6250] eta: 0:08:12 lr: 0.000031 grad: 0.1155 (0.1211) loss: 0.8063 (0.8109) time: 0.1624 data: 0.0748 max mem: 9377 +Train: [68] [3100/6250] eta: 0:07:57 lr: 0.000031 grad: 0.1121 (0.1209) loss: 0.8034 (0.8109) time: 0.1330 data: 0.0447 max mem: 9377 +Train: [68] [3200/6250] eta: 0:07:41 lr: 0.000031 grad: 0.1145 (0.1207) loss: 0.8126 (0.8109) time: 0.1418 data: 0.0530 max mem: 9377 +Train: [68] [3300/6250] eta: 0:07:25 lr: 0.000031 grad: 0.1088 (0.1206) loss: 0.8160 (0.8109) time: 0.1462 data: 0.0637 max mem: 9377 +Train: [68] [3400/6250] eta: 0:07:11 lr: 0.000031 grad: 0.1113 (0.1205) loss: 0.8052 (0.8109) time: 0.1553 data: 0.0729 max mem: 9377 +Train: [68] [3500/6250] eta: 0:06:57 lr: 0.000031 grad: 0.1129 (0.1204) loss: 0.8079 (0.8107) time: 0.1720 data: 0.0896 max mem: 9377 +Train: [68] [3600/6250] eta: 0:06:44 lr: 0.000031 grad: 0.1149 (0.1203) loss: 0.8134 (0.8107) time: 0.1666 data: 0.0861 max mem: 9377 +Train: [68] [3700/6250] eta: 0:06:30 lr: 0.000031 grad: 0.1173 (0.1203) loss: 0.8076 (0.8106) time: 0.1759 data: 0.0935 max mem: 9377 +Train: [68] [3800/6250] eta: 0:06:16 lr: 0.000031 grad: 0.1230 (0.1204) loss: 0.8031 (0.8104) time: 0.1773 data: 0.0969 max mem: 9377 +Train: [68] [3900/6250] eta: 0:06:00 lr: 0.000031 grad: 0.1176 (0.1205) loss: 0.8060 (0.8103) time: 0.1422 data: 0.0577 max mem: 9377 +Train: [68] [4000/6250] eta: 0:05:44 lr: 0.000031 grad: 0.1202 (0.1205) loss: 0.8016 (0.8101) time: 0.1464 data: 0.0655 max mem: 9377 +Train: [68] [4100/6250] eta: 0:05:28 lr: 0.000031 grad: 0.1159 (0.1205) loss: 0.8089 (0.8100) time: 0.1422 data: 0.0595 max mem: 9377 +Train: [68] [4200/6250] eta: 0:05:13 lr: 0.000031 grad: 0.1199 (0.1205) loss: 0.8174 (0.8100) time: 0.1431 data: 0.0591 max mem: 9377 +Train: [68] [4300/6250] eta: 0:04:59 lr: 0.000031 grad: 0.1188 (0.1204) loss: 0.8147 (0.8100) time: 0.1930 data: 0.1116 max mem: 9377 +Train: [68] [4400/6250] eta: 0:04:45 lr: 0.000031 grad: 0.1158 (0.1204) loss: 0.8079 (0.8099) time: 0.2370 data: 0.1599 max mem: 9377 +Train: [68] [4500/6250] eta: 0:04:29 lr: 0.000031 grad: 0.1158 (0.1204) loss: 0.8070 (0.8098) time: 0.1677 data: 0.0852 max mem: 9377 +Train: [68] [4600/6250] eta: 0:04:14 lr: 0.000031 grad: 0.1183 (0.1204) loss: 0.8031 (0.8098) time: 0.1657 data: 0.0821 max mem: 9377 +Train: [68] [4700/6250] eta: 0:03:59 lr: 0.000031 grad: 0.1154 (0.1203) loss: 0.8077 (0.8097) time: 0.1496 data: 0.0686 max mem: 9377 +Train: [68] [4800/6250] eta: 0:03:44 lr: 0.000030 grad: 0.1194 (0.1203) loss: 0.8018 (0.8096) time: 0.1461 data: 0.0635 max mem: 9377 +Train: [68] [4900/6250] eta: 0:03:28 lr: 0.000030 grad: 0.1148 (0.1203) loss: 0.8084 (0.8096) time: 0.1546 data: 0.0708 max mem: 9377 +Train: [68] [5000/6250] eta: 0:03:13 lr: 0.000030 grad: 0.1089 (0.1202) loss: 0.8076 (0.8095) time: 0.1442 data: 0.0593 max mem: 9377 +Train: [68] [5100/6250] eta: 0:02:57 lr: 0.000030 grad: 0.1155 (0.1201) loss: 0.8083 (0.8095) time: 0.1431 data: 0.0557 max mem: 9377 +Train: [68] [5200/6250] eta: 0:02:41 lr: 0.000030 grad: 0.1170 (0.1201) loss: 0.8129 (0.8095) time: 0.1570 data: 0.0632 max mem: 9377 +Train: [68] [5300/6250] eta: 0:02:26 lr: 0.000030 grad: 0.1150 (0.1200) loss: 0.8101 (0.8096) time: 0.1572 data: 0.0754 max mem: 9377 +Train: [68] [5400/6250] eta: 0:02:10 lr: 0.000030 grad: 0.1152 (0.1200) loss: 0.8085 (0.8096) time: 0.1422 data: 0.0453 max mem: 9377 +Train: [68] [5500/6250] eta: 0:01:55 lr: 0.000030 grad: 0.1080 (0.1199) loss: 0.8160 (0.8097) time: 0.1424 data: 0.0656 max mem: 9377 +Train: [68] [5600/6250] eta: 0:01:39 lr: 0.000030 grad: 0.1126 (0.1198) loss: 0.8116 (0.8097) time: 0.1301 data: 0.0450 max mem: 9377 +Train: [68] [5700/6250] eta: 0:01:24 lr: 0.000030 grad: 0.1148 (0.1198) loss: 0.8170 (0.8098) time: 0.1580 data: 0.0825 max mem: 9377 +Train: [68] [5800/6250] eta: 0:01:09 lr: 0.000030 grad: 0.1141 (0.1197) loss: 0.8149 (0.8099) time: 0.1492 data: 0.0720 max mem: 9377 +Train: [68] [5900/6250] eta: 0:00:53 lr: 0.000030 grad: 0.1181 (0.1197) loss: 0.8166 (0.8099) time: 0.1318 data: 0.0470 max mem: 9377 +Train: [68] [6000/6250] eta: 0:00:38 lr: 0.000030 grad: 0.1193 (0.1196) loss: 0.8094 (0.8100) time: 0.1280 data: 0.0481 max mem: 9377 +Train: [68] [6100/6250] eta: 0:00:23 lr: 0.000030 grad: 0.1126 (0.1196) loss: 0.8118 (0.8100) time: 0.1506 data: 0.0701 max mem: 9377 +Train: [68] [6200/6250] eta: 0:00:07 lr: 0.000030 grad: 0.1164 (0.1196) loss: 0.8119 (0.8100) time: 0.1672 data: 0.0856 max mem: 9377 +Train: [68] [6249/6250] eta: 0:00:00 lr: 0.000030 grad: 0.1156 (0.1196) loss: 0.8034 (0.8100) time: 0.1441 data: 0.0548 max mem: 9377 +Train: [68] Total time: 0:16:04 (0.1543 s / it) +Averaged stats: lr: 0.000030 grad: 0.1156 (0.1196) loss: 0.8034 (0.8100) +Eval (hcp-train-subset): [68] [ 0/62] eta: 0:04:46 loss: 0.8257 (0.8257) time: 4.6222 data: 4.5226 max mem: 9377 +Eval (hcp-train-subset): [68] [61/62] eta: 0:00:00 loss: 0.8207 (0.8225) time: 0.1351 data: 0.1082 max mem: 9377 +Eval (hcp-train-subset): [68] Total time: 0:00:14 (0.2349 s / it) +Averaged stats (hcp-train-subset): loss: 0.8207 (0.8225) +Eval (hcp-val): [68] [ 0/62] eta: 0:04:07 loss: 0.8296 (0.8296) time: 3.9855 data: 3.9125 max mem: 9377 +Eval (hcp-val): [68] [61/62] eta: 0:00:00 loss: 0.8309 (0.8324) time: 0.1264 data: 0.1012 max mem: 9377 +Eval (hcp-val): [68] Total time: 0:00:13 (0.2163 s / it) +Averaged stats (hcp-val): loss: 0.8309 (0.8324) +Eval (nsd-val): [68] [ 0/62] eta: 0:05:53 loss: 0.8070 (0.8070) time: 5.7074 data: 5.6710 max mem: 9377 +Eval (nsd-val): [68] [61/62] eta: 0:00:00 loss: 0.8126 (0.8148) time: 0.1328 data: 0.1070 max mem: 9377 +Eval (nsd-val): [68] Total time: 0:00:13 (0.2179 s / it) +Averaged stats (nsd-val): loss: 0.8126 (0.8148) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [69] [ 0/6250] eta: 10:51:05 lr: 0.000030 grad: 0.1803 (0.1803) loss: 0.7572 (0.7572) time: 6.2505 data: 6.1372 max mem: 9377 +Train: [69] [ 100/6250] eta: 0:20:00 lr: 0.000030 grad: 0.1087 (0.1486) loss: 0.8167 (0.8271) time: 0.1486 data: 0.0434 max mem: 9377 +Train: [69] [ 200/6250] eta: 0:17:15 lr: 0.000030 grad: 0.1141 (0.1344) loss: 0.8175 (0.8261) time: 0.1617 data: 0.0592 max mem: 9377 +Train: [69] [ 300/6250] eta: 0:15:58 lr: 0.000030 grad: 0.1191 (0.1297) loss: 0.8207 (0.8248) time: 0.1527 data: 0.0578 max mem: 9377 +Train: [69] [ 400/6250] eta: 0:15:08 lr: 0.000030 grad: 0.1127 (0.1267) loss: 0.8107 (0.8230) time: 0.1312 data: 0.0398 max mem: 9377 +Train: [69] [ 500/6250] eta: 0:14:34 lr: 0.000030 grad: 0.1050 (0.1233) loss: 0.8131 (0.8219) time: 0.1455 data: 0.0500 max mem: 9377 +Train: [69] [ 600/6250] eta: 0:14:05 lr: 0.000030 grad: 0.1127 (0.1217) loss: 0.8201 (0.8211) time: 0.1539 data: 0.0559 max mem: 9377 +Train: [69] [ 700/6250] eta: 0:13:39 lr: 0.000030 grad: 0.1161 (0.1207) loss: 0.8048 (0.8198) time: 0.1281 data: 0.0372 max mem: 9377 +Train: [69] [ 800/6250] eta: 0:13:23 lr: 0.000030 grad: 0.1059 (0.1206) loss: 0.8234 (0.8190) time: 0.1484 data: 0.0539 max mem: 9377 +Train: [69] [ 900/6250] eta: 0:13:13 lr: 0.000030 grad: 0.1118 (0.1199) loss: 0.8193 (0.8184) time: 0.1604 data: 0.0703 max mem: 9377 +Train: [69] [1000/6250] eta: 0:12:54 lr: 0.000030 grad: 0.1062 (0.1189) loss: 0.8109 (0.8184) time: 0.1116 data: 0.0159 max mem: 9377 +Train: [69] [1100/6250] eta: 0:12:34 lr: 0.000030 grad: 0.1071 (0.1185) loss: 0.8159 (0.8182) time: 0.1218 data: 0.0404 max mem: 9377 +Train: [69] [1200/6250] eta: 0:12:19 lr: 0.000030 grad: 0.1112 (0.1179) loss: 0.8134 (0.8180) time: 0.1605 data: 0.0725 max mem: 9377 +Train: [69] [1300/6250] eta: 0:12:03 lr: 0.000030 grad: 0.1104 (0.1173) loss: 0.8103 (0.8178) time: 0.1380 data: 0.0580 max mem: 9377 +Train: [69] [1400/6250] eta: 0:11:49 lr: 0.000030 grad: 0.1041 (0.1170) loss: 0.8114 (0.8175) time: 0.1457 data: 0.0593 max mem: 9377 +Train: [69] [1500/6250] eta: 0:11:35 lr: 0.000030 grad: 0.1101 (0.1168) loss: 0.8175 (0.8174) time: 0.1514 data: 0.0627 max mem: 9377 +Train: [69] [1600/6250] eta: 0:11:22 lr: 0.000030 grad: 0.1132 (0.1167) loss: 0.8136 (0.8170) time: 0.1588 data: 0.0732 max mem: 9377 +Train: [69] [1700/6250] eta: 0:11:09 lr: 0.000030 grad: 0.1121 (0.1165) loss: 0.8074 (0.8168) time: 0.1647 data: 0.0792 max mem: 9377 +Train: [69] [1800/6250] eta: 0:10:53 lr: 0.000030 grad: 0.1108 (0.1165) loss: 0.8100 (0.8165) time: 0.1477 data: 0.0597 max mem: 9377 +Train: [69] [1900/6250] eta: 0:10:37 lr: 0.000030 grad: 0.1156 (0.1165) loss: 0.8062 (0.8162) time: 0.1414 data: 0.0592 max mem: 9377 +Train: [69] [2000/6250] eta: 0:10:21 lr: 0.000030 grad: 0.1087 (0.1166) loss: 0.8121 (0.8159) time: 0.1189 data: 0.0238 max mem: 9377 +Train: [69] [2100/6250] eta: 0:10:06 lr: 0.000029 grad: 0.1175 (0.1167) loss: 0.8103 (0.8156) time: 0.1372 data: 0.0480 max mem: 9377 +Train: [69] [2200/6250] eta: 0:09:58 lr: 0.000029 grad: 0.1181 (0.1170) loss: 0.8066 (0.8152) time: 0.1741 data: 0.0852 max mem: 9377 +Train: [69] [2300/6250] eta: 0:09:45 lr: 0.000029 grad: 0.1201 (0.1171) loss: 0.8038 (0.8149) time: 0.1691 data: 0.0872 max mem: 9377 +Train: [69] [2400/6250] eta: 0:09:30 lr: 0.000029 grad: 0.1163 (0.1172) loss: 0.8212 (0.8147) time: 0.1019 data: 0.0264 max mem: 9377 +Train: [69] [2500/6250] eta: 0:09:16 lr: 0.000029 grad: 0.1124 (0.1173) loss: 0.8134 (0.8144) time: 0.1398 data: 0.0563 max mem: 9377 +Train: [69] [2600/6250] eta: 0:09:00 lr: 0.000029 grad: 0.1208 (0.1175) loss: 0.8088 (0.8141) time: 0.1471 data: 0.0597 max mem: 9377 +Train: [69] [2700/6250] eta: 0:08:45 lr: 0.000029 grad: 0.1195 (0.1177) loss: 0.8037 (0.8138) time: 0.1414 data: 0.0567 max mem: 9377 +Train: [69] [2800/6250] eta: 0:08:29 lr: 0.000029 grad: 0.1212 (0.1180) loss: 0.8076 (0.8134) time: 0.1262 data: 0.0421 max mem: 9377 +Train: [69] [2900/6250] eta: 0:08:13 lr: 0.000029 grad: 0.1132 (0.1181) loss: 0.8084 (0.8132) time: 0.1450 data: 0.0570 max mem: 9377 +Train: [69] [3000/6250] eta: 0:07:57 lr: 0.000029 grad: 0.1139 (0.1183) loss: 0.8082 (0.8130) time: 0.1406 data: 0.0447 max mem: 9377 +Train: [69] [3100/6250] eta: 0:07:41 lr: 0.000029 grad: 0.1246 (0.1184) loss: 0.8036 (0.8127) time: 0.1467 data: 0.0617 max mem: 9377 +Train: [69] [3200/6250] eta: 0:07:25 lr: 0.000029 grad: 0.1233 (0.1185) loss: 0.8070 (0.8125) time: 0.1386 data: 0.0499 max mem: 9377 +Train: [69] [3300/6250] eta: 0:07:10 lr: 0.000029 grad: 0.1241 (0.1186) loss: 0.8011 (0.8123) time: 0.1442 data: 0.0572 max mem: 9377 +Train: [69] [3400/6250] eta: 0:06:55 lr: 0.000029 grad: 0.1231 (0.1188) loss: 0.8108 (0.8121) time: 0.1460 data: 0.0631 max mem: 9377 +Train: [69] [3500/6250] eta: 0:06:40 lr: 0.000029 grad: 0.1288 (0.1190) loss: 0.8001 (0.8118) time: 0.1504 data: 0.0631 max mem: 9377 +Train: [69] [3600/6250] eta: 0:06:25 lr: 0.000029 grad: 0.1206 (0.1191) loss: 0.8001 (0.8115) time: 0.1366 data: 0.0538 max mem: 9377 +Train: [69] [3700/6250] eta: 0:06:10 lr: 0.000029 grad: 0.1195 (0.1192) loss: 0.8082 (0.8113) time: 0.1447 data: 0.0576 max mem: 9377 +Train: [69] [3800/6250] eta: 0:05:55 lr: 0.000029 grad: 0.1181 (0.1193) loss: 0.8109 (0.8112) time: 0.1490 data: 0.0654 max mem: 9377 +Train: [69] [3900/6250] eta: 0:05:40 lr: 0.000029 grad: 0.1182 (0.1194) loss: 0.8074 (0.8110) time: 0.1295 data: 0.0449 max mem: 9377 +Train: [69] [4000/6250] eta: 0:05:25 lr: 0.000029 grad: 0.1173 (0.1195) loss: 0.8058 (0.8109) time: 0.1342 data: 0.0586 max mem: 9377 +Train: [69] [4100/6250] eta: 0:05:12 lr: 0.000029 grad: 0.1153 (0.1195) loss: 0.8091 (0.8109) time: 0.1615 data: 0.0770 max mem: 9377 +Train: [69] [4200/6250] eta: 0:04:58 lr: 0.000029 grad: 0.1153 (0.1196) loss: 0.8079 (0.8108) time: 0.1508 data: 0.0705 max mem: 9377 +Train: [69] [4300/6250] eta: 0:04:44 lr: 0.000029 grad: 0.1139 (0.1196) loss: 0.8131 (0.8107) time: 0.1479 data: 0.0675 max mem: 9377 +Train: [69] [4400/6250] eta: 0:04:29 lr: 0.000029 grad: 0.1202 (0.1196) loss: 0.8048 (0.8107) time: 0.1454 data: 0.0679 max mem: 9377 +Train: [69] [4500/6250] eta: 0:04:15 lr: 0.000029 grad: 0.1190 (0.1197) loss: 0.8020 (0.8107) time: 0.1490 data: 0.0608 max mem: 9377 +Train: [69] [4600/6250] eta: 0:04:00 lr: 0.000029 grad: 0.1177 (0.1197) loss: 0.8045 (0.8106) time: 0.1877 data: 0.1038 max mem: 9377 +Train: [69] [4700/6250] eta: 0:03:45 lr: 0.000029 grad: 0.1141 (0.1198) loss: 0.8109 (0.8105) time: 0.1450 data: 0.0567 max mem: 9377 +Train: [69] [4800/6250] eta: 0:03:31 lr: 0.000029 grad: 0.1258 (0.1199) loss: 0.8086 (0.8105) time: 0.1494 data: 0.0687 max mem: 9377 +Train: [69] [4900/6250] eta: 0:03:16 lr: 0.000029 grad: 0.1195 (0.1199) loss: 0.8083 (0.8104) time: 0.1391 data: 0.0591 max mem: 9377 +Train: [69] [5000/6250] eta: 0:03:01 lr: 0.000029 grad: 0.1234 (0.1200) loss: 0.8042 (0.8103) time: 0.1493 data: 0.0606 max mem: 9377 +Train: [69] [5100/6250] eta: 0:02:47 lr: 0.000029 grad: 0.1253 (0.1201) loss: 0.8031 (0.8102) time: 0.1788 data: 0.0921 max mem: 9377 +Train: [69] [5200/6250] eta: 0:02:32 lr: 0.000029 grad: 0.1208 (0.1202) loss: 0.8014 (0.8101) time: 0.1356 data: 0.0510 max mem: 9377 +Train: [69] [5300/6250] eta: 0:02:17 lr: 0.000029 grad: 0.1136 (0.1202) loss: 0.8081 (0.8100) time: 0.1256 data: 0.0436 max mem: 9377 +Train: [69] [5400/6250] eta: 0:02:03 lr: 0.000029 grad: 0.1146 (0.1203) loss: 0.8018 (0.8099) time: 0.1362 data: 0.0540 max mem: 9377 +Train: [69] [5500/6250] eta: 0:01:48 lr: 0.000029 grad: 0.1168 (0.1202) loss: 0.8042 (0.8099) time: 0.1159 data: 0.0228 max mem: 9377 +Train: [69] [5600/6250] eta: 0:01:34 lr: 0.000028 grad: 0.1188 (0.1203) loss: 0.8087 (0.8099) time: 0.1252 data: 0.0421 max mem: 9377 +Train: [69] [5700/6250] eta: 0:01:19 lr: 0.000028 grad: 0.1186 (0.1203) loss: 0.8131 (0.8099) time: 0.1124 data: 0.0314 max mem: 9377 +Train: [69] [5800/6250] eta: 0:01:05 lr: 0.000028 grad: 0.1102 (0.1203) loss: 0.8126 (0.8099) time: 0.1487 data: 0.0687 max mem: 9377 +Train: [69] [5900/6250] eta: 0:00:50 lr: 0.000028 grad: 0.1227 (0.1203) loss: 0.8124 (0.8099) time: 0.1317 data: 0.0405 max mem: 9377 +Train: [69] [6000/6250] eta: 0:00:36 lr: 0.000028 grad: 0.1143 (0.1203) loss: 0.8055 (0.8098) time: 0.1398 data: 0.0536 max mem: 9377 +Train: [69] [6100/6250] eta: 0:00:21 lr: 0.000028 grad: 0.1193 (0.1203) loss: 0.8042 (0.8098) time: 0.1293 data: 0.0411 max mem: 9377 +Train: [69] [6200/6250] eta: 0:00:07 lr: 0.000028 grad: 0.1172 (0.1203) loss: 0.8068 (0.8098) time: 0.1315 data: 0.0368 max mem: 9377 +Train: [69] [6249/6250] eta: 0:00:00 lr: 0.000028 grad: 0.1185 (0.1204) loss: 0.8058 (0.8097) time: 0.1413 data: 0.0609 max mem: 9377 +Train: [69] Total time: 0:15:09 (0.1454 s / it) +Averaged stats: lr: 0.000028 grad: 0.1185 (0.1204) loss: 0.8058 (0.8097) +Eval (hcp-train-subset): [69] [ 0/62] eta: 0:05:06 loss: 0.8225 (0.8225) time: 4.9423 data: 4.8893 max mem: 9377 +Eval (hcp-train-subset): [69] [61/62] eta: 0:00:00 loss: 0.8222 (0.8215) time: 0.1442 data: 0.1170 max mem: 9377 +Eval (hcp-train-subset): [69] Total time: 0:00:14 (0.2408 s / it) +Averaged stats (hcp-train-subset): loss: 0.8222 (0.8215) +Making plots (hcp-train-subset): example=48 +Eval (hcp-val): [69] [ 0/62] eta: 0:03:32 loss: 0.8254 (0.8254) time: 3.4338 data: 3.3698 max mem: 9377 +Eval (hcp-val): [69] [61/62] eta: 0:00:00 loss: 0.8308 (0.8322) time: 0.1210 data: 0.0939 max mem: 9377 +Eval (hcp-val): [69] Total time: 0:00:13 (0.2170 s / it) +Averaged stats (hcp-val): loss: 0.8308 (0.8322) +Making plots (hcp-val): example=51 +Eval (nsd-val): [69] [ 0/62] eta: 0:05:27 loss: 0.8023 (0.8023) time: 5.2822 data: 5.2176 max mem: 9377 +Eval (nsd-val): [69] [61/62] eta: 0:00:00 loss: 0.8079 (0.8109) time: 0.1147 data: 0.0897 max mem: 9377 +Eval (nsd-val): [69] Total time: 0:00:13 (0.2194 s / it) +Averaged stats (nsd-val): loss: 0.8079 (0.8109) +Making plots (nsd-val): example=1 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00069.pth +Train: [70] [ 0/6250] eta: 10:37:12 lr: 0.000028 grad: 0.1243 (0.1243) loss: 0.8579 (0.8579) time: 6.1172 data: 6.0205 max mem: 9377 +Train: [70] [ 100/6250] eta: 0:20:15 lr: 0.000028 grad: 0.1187 (0.1506) loss: 0.8217 (0.8257) time: 0.1441 data: 0.0549 max mem: 9377 +Train: [70] [ 200/6250] eta: 0:17:31 lr: 0.000028 grad: 0.1088 (0.1453) loss: 0.8189 (0.8211) time: 0.1497 data: 0.0572 max mem: 9377 +Train: [70] [ 300/6250] eta: 0:16:23 lr: 0.000028 grad: 0.1070 (0.1392) loss: 0.8138 (0.8175) time: 0.1379 data: 0.0489 max mem: 9377 +Train: [70] [ 400/6250] eta: 0:15:28 lr: 0.000028 grad: 0.1224 (0.1362) loss: 0.7981 (0.8144) time: 0.1418 data: 0.0530 max mem: 9377 +Train: [70] [ 500/6250] eta: 0:14:48 lr: 0.000028 grad: 0.1173 (0.1333) loss: 0.8033 (0.8124) time: 0.1341 data: 0.0436 max mem: 9377 +Train: [70] [ 600/6250] eta: 0:14:16 lr: 0.000028 grad: 0.1150 (0.1322) loss: 0.8063 (0.8106) time: 0.1349 data: 0.0447 max mem: 9377 +Train: [70] [ 700/6250] eta: 0:13:51 lr: 0.000028 grad: 0.1089 (0.1307) loss: 0.8077 (0.8096) time: 0.1356 data: 0.0432 max mem: 9377 +Train: [70] [ 800/6250] eta: 0:13:30 lr: 0.000028 grad: 0.1151 (0.1297) loss: 0.8043 (0.8087) time: 0.1456 data: 0.0416 max mem: 9377 +Train: [70] [ 900/6250] eta: 0:13:22 lr: 0.000028 grad: 0.1206 (0.1291) loss: 0.8094 (0.8082) time: 0.1645 data: 0.0739 max mem: 9377 +Train: [70] [1000/6250] eta: 0:13:10 lr: 0.000028 grad: 0.1151 (0.1281) loss: 0.8113 (0.8081) time: 0.1727 data: 0.0827 max mem: 9377 +Train: [70] [1100/6250] eta: 0:13:02 lr: 0.000028 grad: 0.1192 (0.1274) loss: 0.8020 (0.8077) time: 0.1765 data: 0.0889 max mem: 9377 +Train: [70] [1200/6250] eta: 0:12:53 lr: 0.000028 grad: 0.1194 (0.1269) loss: 0.8016 (0.8074) time: 0.1695 data: 0.0894 max mem: 9377 +Train: [70] [1300/6250] eta: 0:12:38 lr: 0.000028 grad: 0.1210 (0.1261) loss: 0.8060 (0.8073) time: 0.1395 data: 0.0539 max mem: 9377 +Train: [70] [1400/6250] eta: 0:12:28 lr: 0.000028 grad: 0.1165 (0.1258) loss: 0.8074 (0.8072) time: 0.1624 data: 0.0793 max mem: 9377 +Train: [70] [1500/6250] eta: 0:12:17 lr: 0.000028 grad: 0.1139 (0.1255) loss: 0.8100 (0.8071) time: 0.1699 data: 0.0829 max mem: 9377 +Train: [70] [1600/6250] eta: 0:12:04 lr: 0.000028 grad: 0.1183 (0.1252) loss: 0.8047 (0.8070) time: 0.1710 data: 0.0931 max mem: 9377 +Train: [70] [1700/6250] eta: 0:11:51 lr: 0.000028 grad: 0.1165 (0.1249) loss: 0.8110 (0.8069) time: 0.1890 data: 0.1074 max mem: 9377 +Train: [70] [1800/6250] eta: 0:11:36 lr: 0.000028 grad: 0.1233 (0.1248) loss: 0.8115 (0.8070) time: 0.1593 data: 0.0764 max mem: 9377 +Train: [70] [1900/6250] eta: 0:11:21 lr: 0.000028 grad: 0.1218 (0.1245) loss: 0.8090 (0.8072) time: 0.1661 data: 0.0793 max mem: 9377 +Train: [70] [2000/6250] eta: 0:11:07 lr: 0.000028 grad: 0.1192 (0.1244) loss: 0.8028 (0.8072) time: 0.1452 data: 0.0655 max mem: 9377 +Train: [70] [2100/6250] eta: 0:10:53 lr: 0.000028 grad: 0.1128 (0.1242) loss: 0.8124 (0.8073) time: 0.1583 data: 0.0709 max mem: 9377 +Train: [70] [2200/6250] eta: 0:10:39 lr: 0.000028 grad: 0.1201 (0.1241) loss: 0.8089 (0.8073) time: 0.1763 data: 0.0901 max mem: 9377 +Train: [70] [2300/6250] eta: 0:10:22 lr: 0.000028 grad: 0.1212 (0.1240) loss: 0.8067 (0.8073) time: 0.1285 data: 0.0448 max mem: 9377 +Train: [70] [2400/6250] eta: 0:10:03 lr: 0.000028 grad: 0.1243 (0.1240) loss: 0.8074 (0.8073) time: 0.1534 data: 0.0682 max mem: 9377 +Train: [70] [2500/6250] eta: 0:09:44 lr: 0.000028 grad: 0.1196 (0.1242) loss: 0.8104 (0.8072) time: 0.1341 data: 0.0385 max mem: 9377 +Train: [70] [2600/6250] eta: 0:09:27 lr: 0.000028 grad: 0.1242 (0.1244) loss: 0.8066 (0.8071) time: 0.1438 data: 0.0647 max mem: 9377 +Train: [70] [2700/6250] eta: 0:09:11 lr: 0.000028 grad: 0.1284 (0.1244) loss: 0.8103 (0.8071) time: 0.1531 data: 0.0732 max mem: 9377 +Train: [70] [2800/6250] eta: 0:08:57 lr: 0.000028 grad: 0.1177 (0.1246) loss: 0.8066 (0.8071) time: 0.1614 data: 0.0825 max mem: 9377 +Train: [70] [2900/6250] eta: 0:08:42 lr: 0.000028 grad: 0.1218 (0.1245) loss: 0.8080 (0.8071) time: 0.1720 data: 0.0944 max mem: 9377 +Train: [70] [3000/6250] eta: 0:08:28 lr: 0.000027 grad: 0.1201 (0.1245) loss: 0.8075 (0.8071) time: 0.1368 data: 0.0533 max mem: 9377 +Train: [70] [3100/6250] eta: 0:08:11 lr: 0.000027 grad: 0.1142 (0.1243) loss: 0.8111 (0.8072) time: 0.1231 data: 0.0367 max mem: 9377 +Train: [70] [3200/6250] eta: 0:07:54 lr: 0.000027 grad: 0.1252 (0.1242) loss: 0.8088 (0.8072) time: 0.1496 data: 0.0626 max mem: 9377 +Train: [70] [3300/6250] eta: 0:07:37 lr: 0.000027 grad: 0.1151 (0.1243) loss: 0.8105 (0.8071) time: 0.1075 data: 0.0178 max mem: 9377 +Train: [70] [3400/6250] eta: 0:07:21 lr: 0.000027 grad: 0.1230 (0.1242) loss: 0.8084 (0.8072) time: 0.1403 data: 0.0565 max mem: 9377 +Train: [70] [3500/6250] eta: 0:07:05 lr: 0.000027 grad: 0.1162 (0.1241) loss: 0.8096 (0.8072) time: 0.1516 data: 0.0639 max mem: 9377 +Train: [70] [3600/6250] eta: 0:06:49 lr: 0.000027 grad: 0.1187 (0.1241) loss: 0.8102 (0.8073) time: 0.1455 data: 0.0585 max mem: 9377 +Train: [70] [3700/6250] eta: 0:06:35 lr: 0.000027 grad: 0.1423 (0.1242) loss: 0.8031 (0.8073) time: 0.1822 data: 0.1005 max mem: 9377 +Train: [70] [3800/6250] eta: 0:06:20 lr: 0.000027 grad: 0.1202 (0.1242) loss: 0.8025 (0.8072) time: 0.1551 data: 0.0770 max mem: 9377 +Train: [70] [3900/6250] eta: 0:06:05 lr: 0.000027 grad: 0.1161 (0.1242) loss: 0.8171 (0.8072) time: 0.1386 data: 0.0503 max mem: 9377 +Train: [70] [4000/6250] eta: 0:05:50 lr: 0.000027 grad: 0.1270 (0.1242) loss: 0.8076 (0.8072) time: 0.1452 data: 0.0589 max mem: 9377 +Train: [70] [4100/6250] eta: 0:05:35 lr: 0.000027 grad: 0.1298 (0.1243) loss: 0.8037 (0.8071) time: 0.1641 data: 0.0760 max mem: 9377 +Train: [70] [4200/6250] eta: 0:05:19 lr: 0.000027 grad: 0.1169 (0.1244) loss: 0.8025 (0.8071) time: 0.1138 data: 0.0253 max mem: 9377 +Train: [70] [4300/6250] eta: 0:05:02 lr: 0.000027 grad: 0.1242 (0.1244) loss: 0.8007 (0.8070) time: 0.1270 data: 0.0447 max mem: 9377 +Train: [70] [4400/6250] eta: 0:04:46 lr: 0.000027 grad: 0.1201 (0.1244) loss: 0.8068 (0.8069) time: 0.1404 data: 0.0564 max mem: 9377 +Train: [70] [4500/6250] eta: 0:04:30 lr: 0.000027 grad: 0.1140 (0.1244) loss: 0.8073 (0.8069) time: 0.1251 data: 0.0379 max mem: 9377 +Train: [70] [4600/6250] eta: 0:04:14 lr: 0.000027 grad: 0.1197 (0.1243) loss: 0.8093 (0.8069) time: 0.1563 data: 0.0726 max mem: 9377 +Train: [70] [4700/6250] eta: 0:03:58 lr: 0.000027 grad: 0.1268 (0.1243) loss: 0.8015 (0.8068) time: 0.1490 data: 0.0607 max mem: 9377 +Train: [70] [4800/6250] eta: 0:03:42 lr: 0.000027 grad: 0.1216 (0.1243) loss: 0.8059 (0.8068) time: 0.1397 data: 0.0560 max mem: 9377 +Train: [70] [4900/6250] eta: 0:03:26 lr: 0.000027 grad: 0.1266 (0.1243) loss: 0.7943 (0.8067) time: 0.1451 data: 0.0631 max mem: 9377 +Train: [70] [5000/6250] eta: 0:03:11 lr: 0.000027 grad: 0.1156 (0.1243) loss: 0.8110 (0.8067) time: 0.1560 data: 0.0725 max mem: 9377 +Train: [70] [5100/6250] eta: 0:02:55 lr: 0.000027 grad: 0.1214 (0.1243) loss: 0.7996 (0.8066) time: 0.1440 data: 0.0649 max mem: 9377 +Train: [70] [5200/6250] eta: 0:02:40 lr: 0.000027 grad: 0.1215 (0.1243) loss: 0.8028 (0.8065) time: 0.1521 data: 0.0669 max mem: 9377 +Train: [70] [5300/6250] eta: 0:02:25 lr: 0.000027 grad: 0.1194 (0.1243) loss: 0.8053 (0.8066) time: 0.1537 data: 0.0731 max mem: 9377 +Train: [70] [5400/6250] eta: 0:02:09 lr: 0.000027 grad: 0.1159 (0.1242) loss: 0.8122 (0.8066) time: 0.1490 data: 0.0649 max mem: 9377 +Train: [70] [5500/6250] eta: 0:01:54 lr: 0.000027 grad: 0.1203 (0.1242) loss: 0.8072 (0.8066) time: 0.1432 data: 0.0573 max mem: 9377 +Train: [70] [5600/6250] eta: 0:01:39 lr: 0.000027 grad: 0.1195 (0.1242) loss: 0.8036 (0.8066) time: 0.1654 data: 0.0875 max mem: 9377 +Train: [70] [5700/6250] eta: 0:01:23 lr: 0.000027 grad: 0.1179 (0.1242) loss: 0.8080 (0.8066) time: 0.1683 data: 0.0856 max mem: 9377 +Train: [70] [5800/6250] eta: 0:01:08 lr: 0.000027 grad: 0.1329 (0.1242) loss: 0.8001 (0.8066) time: 0.1644 data: 0.0807 max mem: 9377 +Train: [70] [5900/6250] eta: 0:00:53 lr: 0.000027 grad: 0.1211 (0.1242) loss: 0.7973 (0.8065) time: 0.1548 data: 0.0735 max mem: 9377 +Train: [70] [6000/6250] eta: 0:00:38 lr: 0.000027 grad: 0.1223 (0.1243) loss: 0.8035 (0.8064) time: 0.1646 data: 0.0783 max mem: 9377 +Train: [70] [6100/6250] eta: 0:00:22 lr: 0.000027 grad: 0.1240 (0.1243) loss: 0.7979 (0.8064) time: 0.1473 data: 0.0701 max mem: 9377 +Train: [70] [6200/6250] eta: 0:00:07 lr: 0.000027 grad: 0.1338 (0.1244) loss: 0.7981 (0.8063) time: 0.1463 data: 0.0645 max mem: 9377 +Train: [70] [6249/6250] eta: 0:00:00 lr: 0.000027 grad: 0.1242 (0.1244) loss: 0.8045 (0.8063) time: 0.1328 data: 0.0431 max mem: 9377 +Train: [70] Total time: 0:15:55 (0.1529 s / it) +Averaged stats: lr: 0.000027 grad: 0.1242 (0.1244) loss: 0.8045 (0.8063) +Eval (hcp-train-subset): [70] [ 0/62] eta: 0:04:08 loss: 0.8260 (0.8260) time: 4.0036 data: 3.8646 max mem: 9377 +Eval (hcp-train-subset): [70] [61/62] eta: 0:00:00 loss: 0.8202 (0.8213) time: 0.1421 data: 0.1170 max mem: 9377 +Eval (hcp-train-subset): [70] Total time: 0:00:15 (0.2454 s / it) +Averaged stats (hcp-train-subset): loss: 0.8202 (0.8213) +Eval (hcp-val): [70] [ 0/62] eta: 0:04:07 loss: 0.8290 (0.8290) time: 3.9931 data: 3.9201 max mem: 9377 +Eval (hcp-val): [70] [61/62] eta: 0:00:00 loss: 0.8307 (0.8327) time: 0.1420 data: 0.1149 max mem: 9377 +Eval (hcp-val): [70] Total time: 0:00:13 (0.2203 s / it) +Averaged stats (hcp-val): loss: 0.8307 (0.8327) +Eval (nsd-val): [70] [ 0/62] eta: 0:05:29 loss: 0.8007 (0.8007) time: 5.3194 data: 5.2884 max mem: 9377 +Eval (nsd-val): [70] [61/62] eta: 0:00:00 loss: 0.8083 (0.8090) time: 0.1279 data: 0.1024 max mem: 9377 +Eval (nsd-val): [70] Total time: 0:00:13 (0.2161 s / it) +Averaged stats (nsd-val): loss: 0.8083 (0.8090) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [71] [ 0/6250] eta: 9:55:56 lr: 0.000027 grad: 0.1207 (0.1207) loss: 0.8342 (0.8342) time: 5.7210 data: 5.5883 max mem: 9377 +Train: [71] [ 100/6250] eta: 0:20:28 lr: 0.000027 grad: 0.1313 (0.1547) loss: 0.8063 (0.8204) time: 0.1538 data: 0.0482 max mem: 9377 +Train: [71] [ 200/6250] eta: 0:17:29 lr: 0.000027 grad: 0.1307 (0.1443) loss: 0.8155 (0.8161) time: 0.1434 data: 0.0489 max mem: 9377 +Train: [71] [ 300/6250] eta: 0:16:14 lr: 0.000027 grad: 0.1258 (0.1386) loss: 0.8072 (0.8148) time: 0.1405 data: 0.0494 max mem: 9377 +Train: [71] [ 400/6250] eta: 0:15:30 lr: 0.000026 grad: 0.1163 (0.1344) loss: 0.8085 (0.8145) time: 0.1405 data: 0.0512 max mem: 9377 +Train: [71] [ 500/6250] eta: 0:14:43 lr: 0.000026 grad: 0.1127 (0.1317) loss: 0.8148 (0.8145) time: 0.1135 data: 0.0238 max mem: 9377 +Train: [71] [ 600/6250] eta: 0:14:16 lr: 0.000026 grad: 0.1142 (0.1300) loss: 0.8131 (0.8143) time: 0.1498 data: 0.0602 max mem: 9377 +Train: [71] [ 700/6250] eta: 0:13:44 lr: 0.000026 grad: 0.1110 (0.1288) loss: 0.8221 (0.8144) time: 0.1442 data: 0.0544 max mem: 9377 +Train: [71] [ 800/6250] eta: 0:13:31 lr: 0.000026 grad: 0.1222 (0.1280) loss: 0.8139 (0.8143) time: 0.1680 data: 0.0752 max mem: 9377 +Train: [71] [ 900/6250] eta: 0:13:16 lr: 0.000026 grad: 0.1208 (0.1271) loss: 0.8125 (0.8142) time: 0.1439 data: 0.0566 max mem: 9377 +Train: [71] [1000/6250] eta: 0:13:03 lr: 0.000026 grad: 0.1152 (0.1264) loss: 0.8125 (0.8139) time: 0.1456 data: 0.0590 max mem: 9377 +Train: [71] [1100/6250] eta: 0:12:48 lr: 0.000026 grad: 0.1208 (0.1258) loss: 0.8076 (0.8139) time: 0.1483 data: 0.0653 max mem: 9377 +Train: [71] [1200/6250] eta: 0:12:31 lr: 0.000026 grad: 0.1180 (0.1251) loss: 0.8102 (0.8136) time: 0.1433 data: 0.0542 max mem: 9377 +Train: [71] [1300/6250] eta: 0:12:13 lr: 0.000026 grad: 0.1143 (0.1249) loss: 0.8091 (0.8133) time: 0.1455 data: 0.0651 max mem: 9377 +Train: [71] [1400/6250] eta: 0:11:55 lr: 0.000026 grad: 0.1193 (0.1247) loss: 0.8112 (0.8129) time: 0.1470 data: 0.0670 max mem: 9377 +Train: [71] [1500/6250] eta: 0:11:44 lr: 0.000026 grad: 0.1232 (0.1246) loss: 0.8061 (0.8124) time: 0.1793 data: 0.1026 max mem: 9377 +Train: [71] [1600/6250] eta: 0:11:31 lr: 0.000026 grad: 0.1251 (0.1244) loss: 0.8106 (0.8121) time: 0.1396 data: 0.0522 max mem: 9377 +Train: [71] [1700/6250] eta: 0:11:17 lr: 0.000026 grad: 0.1123 (0.1244) loss: 0.8114 (0.8116) time: 0.1745 data: 0.0935 max mem: 9377 +Train: [71] [1800/6250] eta: 0:11:02 lr: 0.000026 grad: 0.1226 (0.1243) loss: 0.8000 (0.8114) time: 0.1588 data: 0.0798 max mem: 9377 +Train: [71] [1900/6250] eta: 0:10:47 lr: 0.000026 grad: 0.1228 (0.1243) loss: 0.8080 (0.8111) time: 0.1516 data: 0.0685 max mem: 9377 +Train: [71] [2000/6250] eta: 0:10:30 lr: 0.000026 grad: 0.1193 (0.1242) loss: 0.8110 (0.8108) time: 0.1214 data: 0.0413 max mem: 9377 +Train: [71] [2100/6250] eta: 0:10:15 lr: 0.000026 grad: 0.1212 (0.1242) loss: 0.8078 (0.8106) time: 0.1465 data: 0.0680 max mem: 9377 +Train: [71] [2200/6250] eta: 0:09:59 lr: 0.000026 grad: 0.1181 (0.1241) loss: 0.8068 (0.8103) time: 0.1517 data: 0.0688 max mem: 9377 +Train: [71] [2300/6250] eta: 0:09:41 lr: 0.000026 grad: 0.1164 (0.1239) loss: 0.8072 (0.8102) time: 0.1353 data: 0.0460 max mem: 9377 +Train: [71] [2400/6250] eta: 0:09:23 lr: 0.000026 grad: 0.1183 (0.1239) loss: 0.8065 (0.8100) time: 0.1188 data: 0.0398 max mem: 9377 +Train: [71] [2500/6250] eta: 0:09:06 lr: 0.000026 grad: 0.1214 (0.1237) loss: 0.8063 (0.8101) time: 0.1212 data: 0.0247 max mem: 9377 +Train: [71] [2600/6250] eta: 0:08:50 lr: 0.000026 grad: 0.1131 (0.1236) loss: 0.8157 (0.8101) time: 0.1293 data: 0.0424 max mem: 9377 +Train: [71] [2700/6250] eta: 0:08:34 lr: 0.000026 grad: 0.1198 (0.1236) loss: 0.8040 (0.8100) time: 0.1190 data: 0.0321 max mem: 9377 +Train: [71] [2800/6250] eta: 0:08:19 lr: 0.000026 grad: 0.1214 (0.1235) loss: 0.8105 (0.8101) time: 0.1388 data: 0.0547 max mem: 9377 +Train: [71] [2900/6250] eta: 0:08:04 lr: 0.000026 grad: 0.1224 (0.1235) loss: 0.8094 (0.8100) time: 0.1451 data: 0.0623 max mem: 9377 +Train: [71] [3000/6250] eta: 0:07:50 lr: 0.000026 grad: 0.1207 (0.1236) loss: 0.8054 (0.8099) time: 0.1438 data: 0.0668 max mem: 9377 +Train: [71] [3100/6250] eta: 0:07:35 lr: 0.000026 grad: 0.1066 (0.1235) loss: 0.8166 (0.8099) time: 0.1201 data: 0.0335 max mem: 9377 +Train: [71] [3200/6250] eta: 0:07:20 lr: 0.000026 grad: 0.1194 (0.1235) loss: 0.8076 (0.8100) time: 0.1249 data: 0.0416 max mem: 9377 +Train: [71] [3300/6250] eta: 0:07:05 lr: 0.000026 grad: 0.1140 (0.1234) loss: 0.8070 (0.8100) time: 0.1313 data: 0.0425 max mem: 9377 +Train: [71] [3400/6250] eta: 0:06:50 lr: 0.000026 grad: 0.1179 (0.1233) loss: 0.8174 (0.8100) time: 0.1463 data: 0.0660 max mem: 9377 +Train: [71] [3500/6250] eta: 0:06:38 lr: 0.000026 grad: 0.1203 (0.1234) loss: 0.8063 (0.8100) time: 0.1440 data: 0.0557 max mem: 9377 +Train: [71] [3600/6250] eta: 0:06:24 lr: 0.000026 grad: 0.1152 (0.1234) loss: 0.8136 (0.8099) time: 0.1315 data: 0.0544 max mem: 9377 +Train: [71] [3700/6250] eta: 0:06:10 lr: 0.000026 grad: 0.1213 (0.1233) loss: 0.8062 (0.8099) time: 0.1635 data: 0.0838 max mem: 9377 +Train: [71] [3800/6250] eta: 0:05:55 lr: 0.000026 grad: 0.1168 (0.1232) loss: 0.8142 (0.8099) time: 0.1656 data: 0.0859 max mem: 9377 +Train: [71] [3900/6250] eta: 0:05:41 lr: 0.000026 grad: 0.1123 (0.1231) loss: 0.8178 (0.8100) time: 0.1323 data: 0.0515 max mem: 9377 +Train: [71] [4000/6250] eta: 0:05:26 lr: 0.000026 grad: 0.1159 (0.1230) loss: 0.8124 (0.8100) time: 0.1222 data: 0.0362 max mem: 9377 +Train: [71] [4100/6250] eta: 0:05:11 lr: 0.000026 grad: 0.1252 (0.1231) loss: 0.8037 (0.8100) time: 0.1190 data: 0.0318 max mem: 9377 +Train: [71] [4200/6250] eta: 0:04:56 lr: 0.000025 grad: 0.1220 (0.1231) loss: 0.8126 (0.8100) time: 0.1436 data: 0.0592 max mem: 9377 +Train: [71] [4300/6250] eta: 0:04:42 lr: 0.000025 grad: 0.1175 (0.1232) loss: 0.8047 (0.8100) time: 0.1191 data: 0.0258 max mem: 9377 +Train: [71] [4400/6250] eta: 0:04:27 lr: 0.000025 grad: 0.1174 (0.1231) loss: 0.8100 (0.8100) time: 0.1466 data: 0.0657 max mem: 9377 +Train: [71] [4500/6250] eta: 0:04:12 lr: 0.000025 grad: 0.1155 (0.1231) loss: 0.8103 (0.8101) time: 0.1418 data: 0.0586 max mem: 9377 +Train: [71] [4600/6250] eta: 0:03:57 lr: 0.000025 grad: 0.1180 (0.1231) loss: 0.8135 (0.8101) time: 0.1293 data: 0.0462 max mem: 9377 +Train: [71] [4700/6250] eta: 0:03:43 lr: 0.000025 grad: 0.1172 (0.1231) loss: 0.8096 (0.8101) time: 0.1368 data: 0.0551 max mem: 9377 +Train: [71] [4800/6250] eta: 0:03:28 lr: 0.000025 grad: 0.1248 (0.1231) loss: 0.8099 (0.8102) time: 0.1580 data: 0.0768 max mem: 9377 +Train: [71] [4900/6250] eta: 0:03:14 lr: 0.000025 grad: 0.1165 (0.1230) loss: 0.8160 (0.8102) time: 0.1486 data: 0.0682 max mem: 9377 +Train: [71] [5000/6250] eta: 0:02:59 lr: 0.000025 grad: 0.1243 (0.1230) loss: 0.8095 (0.8102) time: 0.1378 data: 0.0495 max mem: 9377 +Train: [71] [5100/6250] eta: 0:02:45 lr: 0.000025 grad: 0.1238 (0.1230) loss: 0.8021 (0.8102) time: 0.1304 data: 0.0504 max mem: 9377 +Train: [71] [5200/6250] eta: 0:02:30 lr: 0.000025 grad: 0.1170 (0.1229) loss: 0.8116 (0.8102) time: 0.1442 data: 0.0631 max mem: 9377 +Train: [71] [5300/6250] eta: 0:02:16 lr: 0.000025 grad: 0.1184 (0.1230) loss: 0.8063 (0.8102) time: 0.1225 data: 0.0412 max mem: 9377 +Train: [71] [5400/6250] eta: 0:02:01 lr: 0.000025 grad: 0.1234 (0.1230) loss: 0.8052 (0.8102) time: 0.1160 data: 0.0338 max mem: 9377 +Train: [71] [5500/6250] eta: 0:01:47 lr: 0.000025 grad: 0.1249 (0.1231) loss: 0.8075 (0.8102) time: 0.1433 data: 0.0508 max mem: 9377 +Train: [71] [5600/6250] eta: 0:01:33 lr: 0.000025 grad: 0.1206 (0.1231) loss: 0.8086 (0.8102) time: 0.1268 data: 0.0379 max mem: 9377 +Train: [71] [5700/6250] eta: 0:01:18 lr: 0.000025 grad: 0.1124 (0.1231) loss: 0.8149 (0.8103) time: 0.1246 data: 0.0429 max mem: 9377 +Train: [71] [5800/6250] eta: 0:01:04 lr: 0.000025 grad: 0.1236 (0.1231) loss: 0.8098 (0.8103) time: 0.1438 data: 0.0591 max mem: 9377 +Train: [71] [5900/6250] eta: 0:00:50 lr: 0.000025 grad: 0.1287 (0.1232) loss: 0.8109 (0.8103) time: 0.1388 data: 0.0582 max mem: 9377 +Train: [71] [6000/6250] eta: 0:00:35 lr: 0.000025 grad: 0.1304 (0.1233) loss: 0.8016 (0.8103) time: 0.1509 data: 0.0674 max mem: 9377 +Train: [71] [6100/6250] eta: 0:00:21 lr: 0.000025 grad: 0.1203 (0.1233) loss: 0.8088 (0.8103) time: 0.1265 data: 0.0446 max mem: 9377 +Train: [71] [6200/6250] eta: 0:00:07 lr: 0.000025 grad: 0.1211 (0.1234) loss: 0.8115 (0.8102) time: 0.1238 data: 0.0380 max mem: 9377 +Train: [71] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.1192 (0.1234) loss: 0.8111 (0.8102) time: 0.1408 data: 0.0563 max mem: 9377 +Train: [71] Total time: 0:15:00 (0.1440 s / it) +Averaged stats: lr: 0.000025 grad: 0.1192 (0.1234) loss: 0.8111 (0.8102) +Eval (hcp-train-subset): [71] [ 0/62] eta: 0:04:42 loss: 0.8251 (0.8251) time: 4.5568 data: 4.4488 max mem: 9377 +Eval (hcp-train-subset): [71] [61/62] eta: 0:00:00 loss: 0.8187 (0.8219) time: 0.1654 data: 0.1379 max mem: 9377 +Eval (hcp-train-subset): [71] Total time: 0:00:15 (0.2558 s / it) +Averaged stats (hcp-train-subset): loss: 0.8187 (0.8219) +Eval (hcp-val): [71] [ 0/62] eta: 0:05:22 loss: 0.8296 (0.8296) time: 5.2002 data: 5.1699 max mem: 9377 +Eval (hcp-val): [71] [61/62] eta: 0:00:00 loss: 0.8313 (0.8325) time: 0.1427 data: 0.1174 max mem: 9377 +Eval (hcp-val): [71] Total time: 0:00:14 (0.2379 s / it) +Averaged stats (hcp-val): loss: 0.8313 (0.8325) +Eval (nsd-val): [71] [ 0/62] eta: 0:06:11 loss: 0.7989 (0.7989) time: 5.9905 data: 5.9405 max mem: 9377 +Eval (nsd-val): [71] [61/62] eta: 0:00:00 loss: 0.8083 (0.8094) time: 0.1399 data: 0.1147 max mem: 9377 +Eval (nsd-val): [71] Total time: 0:00:15 (0.2436 s / it) +Averaged stats (nsd-val): loss: 0.8083 (0.8094) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [72] [ 0/6250] eta: 9:26:05 lr: 0.000025 grad: 0.1088 (0.1088) loss: 0.8688 (0.8688) time: 5.4344 data: 5.0871 max mem: 9377 +Train: [72] [ 100/6250] eta: 0:23:11 lr: 0.000025 grad: 0.1365 (0.1628) loss: 0.8067 (0.8161) time: 0.1758 data: 0.0696 max mem: 9377 +Train: [72] [ 200/6250] eta: 0:19:32 lr: 0.000025 grad: 0.1244 (0.1519) loss: 0.8155 (0.8090) time: 0.1663 data: 0.0631 max mem: 9377 +Train: [72] [ 300/6250] eta: 0:18:17 lr: 0.000025 grad: 0.1203 (0.1448) loss: 0.8019 (0.8072) time: 0.1542 data: 0.0473 max mem: 9377 +Train: [72] [ 400/6250] eta: 0:17:17 lr: 0.000025 grad: 0.1294 (0.1408) loss: 0.8057 (0.8060) time: 0.1599 data: 0.0719 max mem: 9377 +Train: [72] [ 500/6250] eta: 0:16:39 lr: 0.000025 grad: 0.1139 (0.1375) loss: 0.8113 (0.8065) time: 0.1318 data: 0.0420 max mem: 9377 +Train: [72] [ 600/6250] eta: 0:15:55 lr: 0.000025 grad: 0.1197 (0.1350) loss: 0.8129 (0.8075) time: 0.1400 data: 0.0590 max mem: 9377 +Train: [72] [ 700/6250] eta: 0:15:32 lr: 0.000025 grad: 0.1225 (0.1334) loss: 0.8135 (0.8083) time: 0.1660 data: 0.0732 max mem: 9377 +Train: [72] [ 800/6250] eta: 0:15:07 lr: 0.000025 grad: 0.1269 (0.1322) loss: 0.8089 (0.8086) time: 0.1415 data: 0.0567 max mem: 9377 +Train: [72] [ 900/6250] eta: 0:14:45 lr: 0.000025 grad: 0.1235 (0.1314) loss: 0.8104 (0.8090) time: 0.1762 data: 0.0870 max mem: 9377 +Train: [72] [1000/6250] eta: 0:14:24 lr: 0.000025 grad: 0.1234 (0.1307) loss: 0.8095 (0.8091) time: 0.1684 data: 0.0899 max mem: 9377 +Train: [72] [1100/6250] eta: 0:14:02 lr: 0.000025 grad: 0.1210 (0.1300) loss: 0.8083 (0.8093) time: 0.1568 data: 0.0770 max mem: 9377 +Train: [72] [1200/6250] eta: 0:13:39 lr: 0.000025 grad: 0.1232 (0.1297) loss: 0.8084 (0.8092) time: 0.1595 data: 0.0756 max mem: 9377 +Train: [72] [1300/6250] eta: 0:13:21 lr: 0.000025 grad: 0.1229 (0.1293) loss: 0.8140 (0.8092) time: 0.1773 data: 0.0966 max mem: 9377 +Train: [72] [1400/6250] eta: 0:13:04 lr: 0.000025 grad: 0.1271 (0.1288) loss: 0.8079 (0.8093) time: 0.1416 data: 0.0655 max mem: 9377 +Train: [72] [1500/6250] eta: 0:12:47 lr: 0.000025 grad: 0.1253 (0.1285) loss: 0.8060 (0.8091) time: 0.1561 data: 0.0701 max mem: 9377 +Train: [72] [1600/6250] eta: 0:12:26 lr: 0.000025 grad: 0.1249 (0.1284) loss: 0.8041 (0.8090) time: 0.1339 data: 0.0509 max mem: 9377 +Train: [72] [1700/6250] eta: 0:12:07 lr: 0.000024 grad: 0.1155 (0.1279) loss: 0.8161 (0.8092) time: 0.1433 data: 0.0594 max mem: 9377 +Train: [72] [1800/6250] eta: 0:11:48 lr: 0.000024 grad: 0.1248 (0.1277) loss: 0.8107 (0.8092) time: 0.1376 data: 0.0596 max mem: 9377 +Train: [72] [1900/6250] eta: 0:11:28 lr: 0.000024 grad: 0.1186 (0.1274) loss: 0.8097 (0.8092) time: 0.1339 data: 0.0443 max mem: 9377 +Train: [72] [2000/6250] eta: 0:11:08 lr: 0.000024 grad: 0.1165 (0.1272) loss: 0.8119 (0.8094) time: 0.1536 data: 0.0661 max mem: 9377 +Train: [72] [2100/6250] eta: 0:10:48 lr: 0.000024 grad: 0.1256 (0.1270) loss: 0.8037 (0.8094) time: 0.1223 data: 0.0399 max mem: 9377 +Train: [72] [2200/6250] eta: 0:10:29 lr: 0.000024 grad: 0.1160 (0.1267) loss: 0.8062 (0.8096) time: 0.1387 data: 0.0610 max mem: 9377 +Train: [72] [2300/6250] eta: 0:10:09 lr: 0.000024 grad: 0.1218 (0.1265) loss: 0.8078 (0.8096) time: 0.1259 data: 0.0343 max mem: 9377 +Train: [72] [2400/6250] eta: 0:09:50 lr: 0.000024 grad: 0.1181 (0.1263) loss: 0.8094 (0.8096) time: 0.1357 data: 0.0509 max mem: 9377 +Train: [72] [2500/6250] eta: 0:09:33 lr: 0.000024 grad: 0.1177 (0.1262) loss: 0.8095 (0.8095) time: 0.1381 data: 0.0448 max mem: 9377 +Train: [72] [2600/6250] eta: 0:09:16 lr: 0.000024 grad: 0.1154 (0.1261) loss: 0.8124 (0.8096) time: 0.1306 data: 0.0478 max mem: 9377 +Train: [72] [2700/6250] eta: 0:09:00 lr: 0.000024 grad: 0.1244 (0.1262) loss: 0.8091 (0.8096) time: 0.1623 data: 0.0767 max mem: 9377 +Train: [72] [2800/6250] eta: 0:08:44 lr: 0.000024 grad: 0.1203 (0.1262) loss: 0.8027 (0.8095) time: 0.1337 data: 0.0544 max mem: 9377 +Train: [72] [2900/6250] eta: 0:08:28 lr: 0.000024 grad: 0.1226 (0.1262) loss: 0.8125 (0.8094) time: 0.1125 data: 0.0189 max mem: 9377 +Train: [72] [3000/6250] eta: 0:08:11 lr: 0.000024 grad: 0.1338 (0.1263) loss: 0.8056 (0.8092) time: 0.1185 data: 0.0236 max mem: 9377 +Train: [72] [3100/6250] eta: 0:07:55 lr: 0.000024 grad: 0.1239 (0.1263) loss: 0.8116 (0.8091) time: 0.1469 data: 0.0698 max mem: 9377 +Train: [72] [3200/6250] eta: 0:07:39 lr: 0.000024 grad: 0.1277 (0.1263) loss: 0.7963 (0.8090) time: 0.1502 data: 0.0551 max mem: 9377 +Train: [72] [3300/6250] eta: 0:07:27 lr: 0.000024 grad: 0.1257 (0.1264) loss: 0.8089 (0.8088) time: 0.1835 data: 0.0955 max mem: 9377 +Train: [72] [3400/6250] eta: 0:07:13 lr: 0.000024 grad: 0.1185 (0.1264) loss: 0.8088 (0.8087) time: 0.2079 data: 0.1189 max mem: 9377 +Train: [72] [3500/6250] eta: 0:06:58 lr: 0.000024 grad: 0.1244 (0.1265) loss: 0.8024 (0.8085) time: 0.1874 data: 0.1115 max mem: 9377 +Train: [72] [3600/6250] eta: 0:06:43 lr: 0.000024 grad: 0.1246 (0.1265) loss: 0.8015 (0.8084) time: 0.1568 data: 0.0773 max mem: 9377 +Train: [72] [3700/6250] eta: 0:06:29 lr: 0.000024 grad: 0.1262 (0.1266) loss: 0.8017 (0.8083) time: 0.1252 data: 0.0475 max mem: 9377 +Train: [72] [3800/6250] eta: 0:06:13 lr: 0.000024 grad: 0.1204 (0.1266) loss: 0.8048 (0.8082) time: 0.1406 data: 0.0519 max mem: 9377 +Train: [72] [3900/6250] eta: 0:05:58 lr: 0.000024 grad: 0.1213 (0.1265) loss: 0.8108 (0.8081) time: 0.1305 data: 0.0442 max mem: 9377 +Train: [72] [4000/6250] eta: 0:05:42 lr: 0.000024 grad: 0.1234 (0.1267) loss: 0.8094 (0.8080) time: 0.1500 data: 0.0672 max mem: 9377 +Train: [72] [4100/6250] eta: 0:05:26 lr: 0.000024 grad: 0.1230 (0.1267) loss: 0.8118 (0.8079) time: 0.1377 data: 0.0448 max mem: 9377 +Train: [72] [4200/6250] eta: 0:05:10 lr: 0.000024 grad: 0.1377 (0.1267) loss: 0.8009 (0.8079) time: 0.1498 data: 0.0644 max mem: 9377 +Train: [72] [4300/6250] eta: 0:04:54 lr: 0.000024 grad: 0.1201 (0.1268) loss: 0.8129 (0.8078) time: 0.1496 data: 0.0654 max mem: 9377 +Train: [72] [4400/6250] eta: 0:04:39 lr: 0.000024 grad: 0.1237 (0.1268) loss: 0.8067 (0.8078) time: 0.1367 data: 0.0540 max mem: 9377 +Train: [72] [4500/6250] eta: 0:04:23 lr: 0.000024 grad: 0.1260 (0.1268) loss: 0.8065 (0.8077) time: 0.1089 data: 0.0182 max mem: 9377 +Train: [72] [4600/6250] eta: 0:04:08 lr: 0.000024 grad: 0.1293 (0.1269) loss: 0.8027 (0.8076) time: 0.1477 data: 0.0698 max mem: 9377 +Train: [72] [4700/6250] eta: 0:03:52 lr: 0.000024 grad: 0.1335 (0.1270) loss: 0.8058 (0.8075) time: 0.1433 data: 0.0632 max mem: 9377 +Train: [72] [4800/6250] eta: 0:03:37 lr: 0.000024 grad: 0.1273 (0.1271) loss: 0.8019 (0.8074) time: 0.1199 data: 0.0346 max mem: 9377 +Train: [72] [4900/6250] eta: 0:03:22 lr: 0.000024 grad: 0.1270 (0.1272) loss: 0.8071 (0.8074) time: 0.1573 data: 0.0726 max mem: 9377 +Train: [72] [5000/6250] eta: 0:03:07 lr: 0.000024 grad: 0.1367 (0.1272) loss: 0.8015 (0.8073) time: 0.1359 data: 0.0556 max mem: 9377 +Train: [72] [5100/6250] eta: 0:02:52 lr: 0.000024 grad: 0.1321 (0.1273) loss: 0.8058 (0.8072) time: 0.1447 data: 0.0635 max mem: 9377 +Train: [72] [5200/6250] eta: 0:02:37 lr: 0.000024 grad: 0.1242 (0.1274) loss: 0.8028 (0.8071) time: 0.1553 data: 0.0665 max mem: 9377 +Train: [72] [5300/6250] eta: 0:02:22 lr: 0.000024 grad: 0.1291 (0.1276) loss: 0.8055 (0.8070) time: 0.1243 data: 0.0399 max mem: 9377 +Train: [72] [5400/6250] eta: 0:02:07 lr: 0.000024 grad: 0.1429 (0.1277) loss: 0.7999 (0.8070) time: 0.1535 data: 0.0749 max mem: 9377 +Train: [72] [5500/6250] eta: 0:01:52 lr: 0.000023 grad: 0.1288 (0.1278) loss: 0.8020 (0.8069) time: 0.1279 data: 0.0414 max mem: 9377 +Train: [72] [5600/6250] eta: 0:01:37 lr: 0.000023 grad: 0.1297 (0.1279) loss: 0.8044 (0.8068) time: 0.1631 data: 0.0859 max mem: 9377 +Train: [72] [5700/6250] eta: 0:01:22 lr: 0.000023 grad: 0.1236 (0.1279) loss: 0.8027 (0.8068) time: 0.1568 data: 0.0705 max mem: 9377 +Train: [72] [5800/6250] eta: 0:01:07 lr: 0.000023 grad: 0.1266 (0.1280) loss: 0.8040 (0.8068) time: 0.1535 data: 0.0766 max mem: 9377 +Train: [72] [5900/6250] eta: 0:00:52 lr: 0.000023 grad: 0.1276 (0.1280) loss: 0.8088 (0.8068) time: 0.1433 data: 0.0627 max mem: 9377 +Train: [72] [6000/6250] eta: 0:00:37 lr: 0.000023 grad: 0.1230 (0.1280) loss: 0.7962 (0.8067) time: 0.1506 data: 0.0656 max mem: 9377 +Train: [72] [6100/6250] eta: 0:00:22 lr: 0.000023 grad: 0.1224 (0.1280) loss: 0.8097 (0.8067) time: 0.1291 data: 0.0360 max mem: 9377 +Train: [72] [6200/6250] eta: 0:00:07 lr: 0.000023 grad: 0.1252 (0.1280) loss: 0.8063 (0.8068) time: 0.1385 data: 0.0573 max mem: 9377 +Train: [72] [6249/6250] eta: 0:00:00 lr: 0.000023 grad: 0.1207 (0.1279) loss: 0.8117 (0.8069) time: 0.1418 data: 0.0598 max mem: 9377 +Train: [72] Total time: 0:15:36 (0.1499 s / it) +Averaged stats: lr: 0.000023 grad: 0.1207 (0.1279) loss: 0.8117 (0.8069) +Eval (hcp-train-subset): [72] [ 0/62] eta: 0:06:35 loss: 0.8240 (0.8240) time: 6.3781 data: 6.3457 max mem: 9377 +Eval (hcp-train-subset): [72] [61/62] eta: 0:00:00 loss: 0.8189 (0.8208) time: 0.1148 data: 0.0877 max mem: 9377 +Eval (hcp-train-subset): [72] Total time: 0:00:14 (0.2384 s / it) +Averaged stats (hcp-train-subset): loss: 0.8189 (0.8208) +Eval (hcp-val): [72] [ 0/62] eta: 0:05:55 loss: 0.8317 (0.8317) time: 5.7352 data: 5.7038 max mem: 9377 +Eval (hcp-val): [72] [61/62] eta: 0:00:00 loss: 0.8315 (0.8323) time: 0.1142 data: 0.0872 max mem: 9377 +Eval (hcp-val): [72] Total time: 0:00:13 (0.2224 s / it) +Averaged stats (hcp-val): loss: 0.8315 (0.8323) +Eval (nsd-val): [72] [ 0/62] eta: 0:05:13 loss: 0.8016 (0.8016) time: 5.0638 data: 5.0333 max mem: 9377 +Eval (nsd-val): [72] [61/62] eta: 0:00:00 loss: 0.8080 (0.8101) time: 0.1198 data: 0.0943 max mem: 9377 +Eval (nsd-val): [72] Total time: 0:00:13 (0.2213 s / it) +Averaged stats (nsd-val): loss: 0.8080 (0.8101) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [73] [ 0/6250] eta: 7:07:28 lr: 0.000023 grad: 0.0708 (0.0708) loss: 0.8450 (0.8450) time: 4.1038 data: 3.8723 max mem: 9377 +Train: [73] [ 100/6250] eta: 0:20:14 lr: 0.000023 grad: 0.1349 (0.1631) loss: 0.8135 (0.8198) time: 0.1478 data: 0.0453 max mem: 9377 +Train: [73] [ 200/6250] eta: 0:18:01 lr: 0.000023 grad: 0.1561 (0.1580) loss: 0.7955 (0.8118) time: 0.1741 data: 0.0784 max mem: 9377 +Train: [73] [ 300/6250] eta: 0:16:59 lr: 0.000023 grad: 0.1327 (0.1530) loss: 0.8105 (0.8080) time: 0.1369 data: 0.0427 max mem: 9377 +Train: [73] [ 400/6250] eta: 0:16:37 lr: 0.000023 grad: 0.1279 (0.1489) loss: 0.8001 (0.8080) time: 0.1589 data: 0.0670 max mem: 9377 +Train: [73] [ 500/6250] eta: 0:15:45 lr: 0.000023 grad: 0.1375 (0.1467) loss: 0.8068 (0.8070) time: 0.1391 data: 0.0452 max mem: 9377 +Train: [73] [ 600/6250] eta: 0:15:02 lr: 0.000023 grad: 0.1246 (0.1435) loss: 0.8146 (0.8075) time: 0.1267 data: 0.0397 max mem: 9377 +Train: [73] [ 700/6250] eta: 0:14:36 lr: 0.000023 grad: 0.1254 (0.1411) loss: 0.8071 (0.8078) time: 0.1379 data: 0.0493 max mem: 9377 +Train: [73] [ 800/6250] eta: 0:14:10 lr: 0.000023 grad: 0.1159 (0.1387) loss: 0.8146 (0.8083) time: 0.1363 data: 0.0475 max mem: 9377 +Train: [73] [ 900/6250] eta: 0:13:49 lr: 0.000023 grad: 0.1216 (0.1370) loss: 0.8109 (0.8090) time: 0.1538 data: 0.0668 max mem: 9377 +Train: [73] [1000/6250] eta: 0:13:25 lr: 0.000023 grad: 0.1205 (0.1363) loss: 0.8131 (0.8092) time: 0.1288 data: 0.0409 max mem: 9377 +Train: [73] [1100/6250] eta: 0:13:05 lr: 0.000023 grad: 0.1249 (0.1353) loss: 0.8121 (0.8093) time: 0.1501 data: 0.0686 max mem: 9377 +Train: [73] [1200/6250] eta: 0:12:51 lr: 0.000023 grad: 0.1213 (0.1343) loss: 0.8089 (0.8094) time: 0.1455 data: 0.0650 max mem: 9377 +Train: [73] [1300/6250] eta: 0:12:33 lr: 0.000023 grad: 0.1251 (0.1334) loss: 0.8078 (0.8095) time: 0.1531 data: 0.0671 max mem: 9377 +Train: [73] [1400/6250] eta: 0:12:19 lr: 0.000023 grad: 0.1214 (0.1328) loss: 0.8119 (0.8096) time: 0.1596 data: 0.0739 max mem: 9377 +Train: [73] [1500/6250] eta: 0:12:00 lr: 0.000023 grad: 0.1209 (0.1321) loss: 0.8120 (0.8095) time: 0.1410 data: 0.0557 max mem: 9377 +Train: [73] [1600/6250] eta: 0:11:42 lr: 0.000023 grad: 0.1192 (0.1315) loss: 0.8181 (0.8096) time: 0.1305 data: 0.0469 max mem: 9377 +Train: [73] [1700/6250] eta: 0:11:24 lr: 0.000023 grad: 0.1136 (0.1310) loss: 0.8121 (0.8097) time: 0.1379 data: 0.0488 max mem: 9377 +Train: [73] [1800/6250] eta: 0:11:06 lr: 0.000023 grad: 0.1280 (0.1307) loss: 0.8046 (0.8096) time: 0.1421 data: 0.0575 max mem: 9377 +Train: [73] [1900/6250] eta: 0:10:48 lr: 0.000023 grad: 0.1195 (0.1303) loss: 0.8131 (0.8096) time: 0.1467 data: 0.0536 max mem: 9377 +Train: [73] [2000/6250] eta: 0:10:30 lr: 0.000023 grad: 0.1229 (0.1301) loss: 0.8104 (0.8096) time: 0.1332 data: 0.0529 max mem: 9377 +Train: [73] [2100/6250] eta: 0:10:12 lr: 0.000023 grad: 0.1220 (0.1298) loss: 0.8075 (0.8096) time: 0.1224 data: 0.0324 max mem: 9377 +Train: [73] [2200/6250] eta: 0:09:56 lr: 0.000023 grad: 0.1166 (0.1295) loss: 0.8075 (0.8096) time: 0.1627 data: 0.0797 max mem: 9377 +Train: [73] [2300/6250] eta: 0:09:38 lr: 0.000023 grad: 0.1226 (0.1293) loss: 0.8100 (0.8095) time: 0.1323 data: 0.0452 max mem: 9377 +Train: [73] [2400/6250] eta: 0:09:22 lr: 0.000023 grad: 0.1206 (0.1292) loss: 0.8091 (0.8094) time: 0.1442 data: 0.0586 max mem: 9377 +Train: [73] [2500/6250] eta: 0:09:07 lr: 0.000023 grad: 0.1248 (0.1290) loss: 0.8100 (0.8093) time: 0.1545 data: 0.0709 max mem: 9377 +Train: [73] [2600/6250] eta: 0:08:51 lr: 0.000023 grad: 0.1170 (0.1289) loss: 0.8089 (0.8091) time: 0.1259 data: 0.0368 max mem: 9377 +Train: [73] [2700/6250] eta: 0:08:36 lr: 0.000023 grad: 0.1224 (0.1288) loss: 0.8070 (0.8090) time: 0.1399 data: 0.0569 max mem: 9377 +Train: [73] [2800/6250] eta: 0:08:22 lr: 0.000023 grad: 0.1196 (0.1287) loss: 0.8070 (0.8089) time: 0.1507 data: 0.0682 max mem: 9377 +Train: [73] [2900/6250] eta: 0:08:06 lr: 0.000023 grad: 0.1249 (0.1287) loss: 0.8096 (0.8089) time: 0.1478 data: 0.0667 max mem: 9377 +Train: [73] [3000/6250] eta: 0:07:51 lr: 0.000023 grad: 0.1261 (0.1287) loss: 0.8131 (0.8089) time: 0.1279 data: 0.0424 max mem: 9377 +Train: [73] [3100/6250] eta: 0:07:36 lr: 0.000023 grad: 0.1173 (0.1287) loss: 0.8145 (0.8090) time: 0.1629 data: 0.0857 max mem: 9377 +Train: [73] [3200/6250] eta: 0:07:24 lr: 0.000022 grad: 0.1129 (0.1286) loss: 0.8215 (0.8090) time: 0.1823 data: 0.0907 max mem: 9377 +Train: [73] [3300/6250] eta: 0:07:11 lr: 0.000022 grad: 0.1284 (0.1286) loss: 0.8105 (0.8090) time: 0.1724 data: 0.0892 max mem: 9377 +Train: [73] [3400/6250] eta: 0:06:56 lr: 0.000022 grad: 0.1234 (0.1285) loss: 0.8102 (0.8091) time: 0.1428 data: 0.0623 max mem: 9377 +Train: [73] [3500/6250] eta: 0:06:41 lr: 0.000022 grad: 0.1257 (0.1285) loss: 0.8049 (0.8091) time: 0.1489 data: 0.0654 max mem: 9377 +Train: [73] [3600/6250] eta: 0:06:27 lr: 0.000022 grad: 0.1217 (0.1283) loss: 0.8145 (0.8091) time: 0.1416 data: 0.0605 max mem: 9377 +Train: [73] [3700/6250] eta: 0:06:12 lr: 0.000022 grad: 0.1228 (0.1282) loss: 0.8049 (0.8091) time: 0.1494 data: 0.0670 max mem: 9377 +Train: [73] [3800/6250] eta: 0:05:58 lr: 0.000022 grad: 0.1180 (0.1281) loss: 0.8069 (0.8091) time: 0.1437 data: 0.0541 max mem: 9377 +Train: [73] [3900/6250] eta: 0:05:43 lr: 0.000022 grad: 0.1177 (0.1280) loss: 0.8124 (0.8091) time: 0.1538 data: 0.0694 max mem: 9377 +Train: [73] [4000/6250] eta: 0:05:29 lr: 0.000022 grad: 0.1254 (0.1279) loss: 0.8090 (0.8092) time: 0.1441 data: 0.0569 max mem: 9377 +Train: [73] [4100/6250] eta: 0:05:14 lr: 0.000022 grad: 0.1253 (0.1278) loss: 0.8105 (0.8092) time: 0.1420 data: 0.0623 max mem: 9377 +Train: [73] [4200/6250] eta: 0:04:59 lr: 0.000022 grad: 0.1177 (0.1278) loss: 0.8132 (0.8093) time: 0.1405 data: 0.0618 max mem: 9377 +Train: [73] [4300/6250] eta: 0:04:44 lr: 0.000022 grad: 0.1184 (0.1276) loss: 0.8147 (0.8093) time: 0.1477 data: 0.0599 max mem: 9377 +Train: [73] [4400/6250] eta: 0:04:30 lr: 0.000022 grad: 0.1218 (0.1275) loss: 0.8124 (0.8094) time: 0.1300 data: 0.0400 max mem: 9377 +Train: [73] [4500/6250] eta: 0:04:16 lr: 0.000022 grad: 0.1152 (0.1274) loss: 0.8125 (0.8095) time: 0.1508 data: 0.0706 max mem: 9377 +Train: [73] [4600/6250] eta: 0:04:02 lr: 0.000022 grad: 0.1213 (0.1274) loss: 0.8137 (0.8096) time: 0.1645 data: 0.0836 max mem: 9377 +Train: [73] [4700/6250] eta: 0:03:48 lr: 0.000022 grad: 0.1246 (0.1274) loss: 0.8125 (0.8097) time: 0.1632 data: 0.0835 max mem: 9377 +Train: [73] [4800/6250] eta: 0:03:33 lr: 0.000022 grad: 0.1232 (0.1274) loss: 0.8030 (0.8097) time: 0.1615 data: 0.0773 max mem: 9377 +Train: [73] [4900/6250] eta: 0:03:19 lr: 0.000022 grad: 0.1223 (0.1274) loss: 0.8101 (0.8097) time: 0.1686 data: 0.0928 max mem: 9377 +Train: [73] [5000/6250] eta: 0:03:04 lr: 0.000022 grad: 0.1262 (0.1274) loss: 0.8137 (0.8097) time: 0.1498 data: 0.0657 max mem: 9377 +Train: [73] [5100/6250] eta: 0:02:50 lr: 0.000022 grad: 0.1198 (0.1273) loss: 0.8159 (0.8097) time: 0.1700 data: 0.0940 max mem: 9377 +Train: [73] [5200/6250] eta: 0:02:35 lr: 0.000022 grad: 0.1217 (0.1273) loss: 0.8065 (0.8097) time: 0.1404 data: 0.0572 max mem: 9377 +Train: [73] [5300/6250] eta: 0:02:20 lr: 0.000022 grad: 0.1198 (0.1273) loss: 0.8152 (0.8097) time: 0.1502 data: 0.0688 max mem: 9377 +Train: [73] [5400/6250] eta: 0:02:05 lr: 0.000022 grad: 0.1255 (0.1274) loss: 0.8048 (0.8097) time: 0.1391 data: 0.0588 max mem: 9377 +Train: [73] [5500/6250] eta: 0:01:50 lr: 0.000022 grad: 0.1194 (0.1275) loss: 0.8011 (0.8096) time: 0.1290 data: 0.0460 max mem: 9377 +Train: [73] [5600/6250] eta: 0:01:35 lr: 0.000022 grad: 0.1283 (0.1275) loss: 0.8077 (0.8095) time: 0.1634 data: 0.0836 max mem: 9377 +Train: [73] [5700/6250] eta: 0:01:21 lr: 0.000022 grad: 0.1244 (0.1275) loss: 0.8028 (0.8095) time: 0.1396 data: 0.0570 max mem: 9377 +Train: [73] [5800/6250] eta: 0:01:06 lr: 0.000022 grad: 0.1286 (0.1275) loss: 0.8086 (0.8094) time: 0.1540 data: 0.0735 max mem: 9377 +Train: [73] [5900/6250] eta: 0:00:51 lr: 0.000022 grad: 0.1195 (0.1275) loss: 0.8097 (0.8094) time: 0.1568 data: 0.0685 max mem: 9377 +Train: [73] [6000/6250] eta: 0:00:36 lr: 0.000022 grad: 0.1312 (0.1275) loss: 0.8066 (0.8094) time: 0.1545 data: 0.0772 max mem: 9377 +Train: [73] [6100/6250] eta: 0:00:22 lr: 0.000022 grad: 0.1282 (0.1275) loss: 0.8045 (0.8093) time: 0.1345 data: 0.0461 max mem: 9377 +Train: [73] [6200/6250] eta: 0:00:07 lr: 0.000022 grad: 0.1212 (0.1275) loss: 0.8077 (0.8093) time: 0.1035 data: 0.0193 max mem: 9377 +Train: [73] [6249/6250] eta: 0:00:00 lr: 0.000022 grad: 0.1234 (0.1275) loss: 0.7980 (0.8093) time: 0.1532 data: 0.0718 max mem: 9377 +Train: [73] Total time: 0:15:27 (0.1484 s / it) +Averaged stats: lr: 0.000022 grad: 0.1234 (0.1275) loss: 0.7980 (0.8093) +Eval (hcp-train-subset): [73] [ 0/62] eta: 0:06:40 loss: 0.8263 (0.8263) time: 6.4664 data: 6.4339 max mem: 9377 +Eval (hcp-train-subset): [73] [61/62] eta: 0:00:00 loss: 0.8143 (0.8195) time: 0.1248 data: 0.0992 max mem: 9377 +Eval (hcp-train-subset): [73] Total time: 0:00:15 (0.2432 s / it) +Averaged stats (hcp-train-subset): loss: 0.8143 (0.8195) +Eval (hcp-val): [73] [ 0/62] eta: 0:05:39 loss: 0.8317 (0.8317) time: 5.4813 data: 5.4510 max mem: 9377 +Eval (hcp-val): [73] [61/62] eta: 0:00:00 loss: 0.8304 (0.8317) time: 0.1182 data: 0.0930 max mem: 9377 +Eval (hcp-val): [73] Total time: 0:00:13 (0.2162 s / it) +Averaged stats (hcp-val): loss: 0.8304 (0.8317) +Eval (nsd-val): [73] [ 0/62] eta: 0:05:38 loss: 0.8019 (0.8019) time: 5.4555 data: 5.4233 max mem: 9377 +Eval (nsd-val): [73] [61/62] eta: 0:00:00 loss: 0.8079 (0.8104) time: 0.1327 data: 0.1076 max mem: 9377 +Eval (nsd-val): [73] Total time: 0:00:13 (0.2187 s / it) +Averaged stats (nsd-val): loss: 0.8079 (0.8104) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [74] [ 0/6250] eta: 9:52:33 lr: 0.000022 grad: nan (nan) loss: 0.8464 (0.8464) time: 5.6885 data: 5.5833 max mem: 9377 +Train: [74] [ 100/6250] eta: 0:20:17 lr: 0.000022 grad: 0.1370 (0.1550) loss: 0.8112 (0.8184) time: 0.1345 data: 0.0361 max mem: 9377 +Train: [74] [ 200/6250] eta: 0:17:46 lr: 0.000022 grad: 0.1191 (0.1463) loss: 0.8219 (0.8177) time: 0.1420 data: 0.0467 max mem: 9377 +Train: [74] [ 300/6250] eta: 0:16:40 lr: 0.000022 grad: 0.1226 (0.1434) loss: 0.8180 (0.8156) time: 0.1441 data: 0.0593 max mem: 9377 +Train: [74] [ 400/6250] eta: 0:15:50 lr: 0.000022 grad: 0.1236 (0.1397) loss: 0.8202 (0.8154) time: 0.1327 data: 0.0380 max mem: 9377 +Train: [74] [ 500/6250] eta: 0:15:06 lr: 0.000022 grad: 0.1236 (0.1368) loss: 0.8105 (0.8152) time: 0.1300 data: 0.0355 max mem: 9377 +Train: [74] [ 600/6250] eta: 0:14:40 lr: 0.000022 grad: 0.1272 (0.1345) loss: 0.8137 (0.8148) time: 0.1262 data: 0.0425 max mem: 9377 +Train: [74] [ 700/6250] eta: 0:14:12 lr: 0.000022 grad: 0.1179 (0.1332) loss: 0.8157 (0.8145) time: 0.1513 data: 0.0567 max mem: 9377 +Train: [74] [ 800/6250] eta: 0:13:45 lr: 0.000022 grad: 0.1231 (0.1319) loss: 0.8090 (0.8142) time: 0.1367 data: 0.0475 max mem: 9377 +Train: [74] [ 900/6250] eta: 0:13:41 lr: 0.000021 grad: 0.1323 (0.1316) loss: 0.8058 (0.8138) time: 0.2447 data: 0.1543 max mem: 9377 +Train: [74] [1000/6250] eta: 0:13:34 lr: 0.000021 grad: 0.1246 (0.1313) loss: 0.8100 (0.8138) time: 0.1375 data: 0.0514 max mem: 9377 +Train: [74] [1100/6250] eta: 0:13:23 lr: 0.000021 grad: 0.1238 (0.1307) loss: 0.8055 (0.8134) time: 0.1603 data: 0.0727 max mem: 9377 +Train: [74] [1200/6250] eta: 0:13:09 lr: 0.000021 grad: 0.1170 (0.1302) loss: 0.8093 (0.8131) time: 0.1512 data: 0.0664 max mem: 9377 +Train: [74] [1300/6250] eta: 0:12:55 lr: 0.000021 grad: 0.1159 (0.1299) loss: 0.8062 (0.8130) time: 0.1520 data: 0.0725 max mem: 9377 +Train: [74] [1400/6250] eta: 0:12:41 lr: 0.000021 grad: 0.1229 (0.1298) loss: 0.8038 (0.8125) time: 0.1634 data: 0.0777 max mem: 9377 +Train: [74] [1500/6250] eta: 0:12:26 lr: 0.000021 grad: 0.1230 (0.1296) loss: 0.8100 (0.8122) time: 0.1660 data: 0.0798 max mem: 9377 +Train: [74] [1600/6250] eta: 0:12:10 lr: 0.000021 grad: 0.1249 (0.1294) loss: 0.8108 (0.8120) time: 0.1591 data: 0.0707 max mem: 9377 +Train: [74] [1700/6250] eta: 0:11:53 lr: 0.000021 grad: 0.1228 (0.1292) loss: 0.8053 (0.8119) time: 0.1710 data: 0.0858 max mem: 9377 +Train: [74] [1800/6250] eta: 0:11:33 lr: 0.000021 grad: 0.1270 (0.1292) loss: 0.8129 (0.8117) time: 0.1438 data: 0.0586 max mem: 9377 +Train: [74] [1900/6250] eta: 0:11:16 lr: 0.000021 grad: 0.1251 (0.1291) loss: 0.7998 (0.8115) time: 0.1570 data: 0.0773 max mem: 9377 +Train: [74] [2000/6250] eta: 0:10:58 lr: 0.000021 grad: 0.1210 (0.1290) loss: 0.8087 (0.8114) time: 0.1407 data: 0.0608 max mem: 9377 +Train: [74] [2100/6250] eta: 0:10:41 lr: 0.000021 grad: 0.1243 (0.1290) loss: 0.8010 (0.8112) time: 0.1423 data: 0.0637 max mem: 9377 +Train: [74] [2200/6250] eta: 0:10:27 lr: 0.000021 grad: 0.1212 (0.1290) loss: 0.8026 (0.8110) time: 0.1674 data: 0.0864 max mem: 9377 +Train: [74] [2300/6250] eta: 0:10:13 lr: 0.000021 grad: 0.1274 (0.1288) loss: 0.8101 (0.8110) time: 0.1573 data: 0.0802 max mem: 9377 +Train: [74] [2400/6250] eta: 0:09:57 lr: 0.000021 grad: 0.1232 (0.1288) loss: 0.8067 (0.8108) time: 0.1552 data: 0.0745 max mem: 9377 +Train: [74] [2500/6250] eta: 0:09:43 lr: 0.000021 grad: 0.1140 (0.1287) loss: 0.8151 (0.8107) time: 0.1703 data: 0.0934 max mem: 9377 +Train: [74] [2600/6250] eta: 0:09:28 lr: 0.000021 grad: 0.1279 (0.1287) loss: 0.8051 (0.8106) time: 0.1614 data: 0.0789 max mem: 9377 +Train: [74] [2700/6250] eta: 0:09:13 lr: 0.000021 grad: 0.1223 (0.1286) loss: 0.8046 (0.8105) time: 0.1614 data: 0.0777 max mem: 9377 +Train: [74] [2800/6250] eta: 0:08:57 lr: 0.000021 grad: 0.1233 (0.1285) loss: 0.8125 (0.8105) time: 0.1514 data: 0.0609 max mem: 9377 +Train: [74] [2900/6250] eta: 0:08:44 lr: 0.000021 grad: 0.1295 (0.1286) loss: 0.8106 (0.8105) time: 0.1512 data: 0.0638 max mem: 9377 +Train: [74] [3000/6250] eta: 0:08:28 lr: 0.000021 grad: 0.1156 (0.1286) loss: 0.8091 (0.8104) time: 0.1515 data: 0.0690 max mem: 9377 +Train: [74] [3100/6250] eta: 0:08:13 lr: 0.000021 grad: 0.1217 (0.1287) loss: 0.8087 (0.8103) time: 0.1453 data: 0.0565 max mem: 9377 +Train: [74] [3200/6250] eta: 0:07:57 lr: 0.000021 grad: 0.1247 (0.1287) loss: 0.8088 (0.8103) time: 0.1631 data: 0.0794 max mem: 9377 +Train: [74] [3300/6250] eta: 0:07:41 lr: 0.000021 grad: 0.1290 (0.1286) loss: 0.8112 (0.8103) time: 0.1372 data: 0.0484 max mem: 9377 +Train: [74] [3400/6250] eta: 0:07:24 lr: 0.000021 grad: 0.1281 (0.1287) loss: 0.8128 (0.8103) time: 0.1351 data: 0.0517 max mem: 9377 +Train: [74] [3500/6250] eta: 0:07:07 lr: 0.000021 grad: 0.1325 (0.1288) loss: 0.8092 (0.8102) time: 0.1446 data: 0.0580 max mem: 9377 +Train: [74] [3600/6250] eta: 0:06:50 lr: 0.000021 grad: 0.1272 (0.1289) loss: 0.8079 (0.8102) time: 0.1404 data: 0.0546 max mem: 9377 +Train: [74] [3700/6250] eta: 0:06:33 lr: 0.000021 grad: 0.1367 (0.1288) loss: 0.8069 (0.8102) time: 0.1192 data: 0.0317 max mem: 9377 +Train: [74] [3800/6250] eta: 0:06:18 lr: 0.000021 grad: 0.1338 (0.1290) loss: 0.8080 (0.8102) time: 0.2096 data: 0.1262 max mem: 9377 +Train: [74] [3900/6250] eta: 0:06:01 lr: 0.000021 grad: 0.1269 (0.1290) loss: 0.8027 (0.8101) time: 0.1426 data: 0.0615 max mem: 9377 +Train: [74] [4000/6250] eta: 0:05:46 lr: 0.000021 grad: 0.1274 (0.1291) loss: 0.8112 (0.8101) time: 0.1404 data: 0.0534 max mem: 9377 +Train: [74] [4100/6250] eta: 0:05:30 lr: 0.000021 grad: 0.1298 (0.1291) loss: 0.8064 (0.8101) time: 0.1513 data: 0.0766 max mem: 9377 +Train: [74] [4200/6250] eta: 0:05:14 lr: 0.000021 grad: 0.1274 (0.1291) loss: 0.8025 (0.8101) time: 0.1295 data: 0.0460 max mem: 9377 +Train: [74] [4300/6250] eta: 0:04:57 lr: 0.000021 grad: 0.1202 (0.1291) loss: 0.8112 (0.8101) time: 0.1420 data: 0.0601 max mem: 9377 +Train: [74] [4400/6250] eta: 0:04:42 lr: 0.000021 grad: 0.1219 (0.1291) loss: 0.8094 (0.8101) time: 0.1296 data: 0.0400 max mem: 9377 +Train: [74] [4500/6250] eta: 0:04:26 lr: 0.000021 grad: 0.1275 (0.1290) loss: 0.8097 (0.8101) time: 0.1417 data: 0.0545 max mem: 9377 +Train: [74] [4600/6250] eta: 0:04:10 lr: 0.000021 grad: 0.1270 (0.1290) loss: 0.8115 (0.8101) time: 0.1587 data: 0.0768 max mem: 9377 +Train: [74] [4700/6250] eta: 0:03:55 lr: 0.000021 grad: 0.1172 (0.1290) loss: 0.8094 (0.8101) time: 0.1429 data: 0.0620 max mem: 9377 +Train: [74] [4800/6250] eta: 0:03:39 lr: 0.000021 grad: 0.1275 (0.1290) loss: 0.8125 (0.8102) time: 0.1598 data: 0.0688 max mem: 9377 +Train: [74] [4900/6250] eta: 0:03:24 lr: 0.000020 grad: 0.1242 (0.1289) loss: 0.8086 (0.8102) time: 0.1436 data: 0.0638 max mem: 9377 +Train: [74] [5000/6250] eta: 0:03:09 lr: 0.000020 grad: 0.1251 (0.1289) loss: 0.8125 (0.8102) time: 0.1416 data: 0.0653 max mem: 9377 +Train: [74] [5100/6250] eta: 0:02:53 lr: 0.000020 grad: 0.1225 (0.1288) loss: 0.8131 (0.8102) time: 0.1262 data: 0.0357 max mem: 9377 +Train: [74] [5200/6250] eta: 0:02:38 lr: 0.000020 grad: 0.1216 (0.1288) loss: 0.8169 (0.8102) time: 0.1399 data: 0.0612 max mem: 9377 +Train: [74] [5300/6250] eta: 0:02:23 lr: 0.000020 grad: 0.1150 (0.1288) loss: 0.8129 (0.8101) time: 0.1260 data: 0.0451 max mem: 9377 +Train: [74] [5400/6250] eta: 0:02:07 lr: 0.000020 grad: 0.1299 (0.1289) loss: 0.8013 (0.8101) time: 0.1485 data: 0.0512 max mem: 9377 +Train: [74] [5500/6250] eta: 0:01:52 lr: 0.000020 grad: 0.1339 (0.1289) loss: 0.8057 (0.8100) time: 0.1489 data: 0.0682 max mem: 9377 +Train: [74] [5600/6250] eta: 0:01:37 lr: 0.000020 grad: 0.1296 (0.1288) loss: 0.8162 (0.8100) time: 0.1487 data: 0.0697 max mem: 9377 +Train: [74] [5700/6250] eta: 0:01:22 lr: 0.000020 grad: 0.1272 (0.1288) loss: 0.8087 (0.8100) time: 0.1518 data: 0.0709 max mem: 9377 +Train: [74] [5800/6250] eta: 0:01:07 lr: 0.000020 grad: 0.1231 (0.1288) loss: 0.8113 (0.8100) time: 0.1437 data: 0.0579 max mem: 9377 +Train: [74] [5900/6250] eta: 0:00:52 lr: 0.000020 grad: 0.1277 (0.1289) loss: 0.8038 (0.8100) time: 0.1432 data: 0.0671 max mem: 9377 +Train: [74] [6000/6250] eta: 0:00:37 lr: 0.000020 grad: 0.1264 (0.1289) loss: 0.8082 (0.8099) time: 0.1638 data: 0.0824 max mem: 9377 +Train: [74] [6100/6250] eta: 0:00:22 lr: 0.000020 grad: 0.1250 (0.1288) loss: 0.8131 (0.8099) time: 0.1543 data: 0.0753 max mem: 9377 +Train: [74] [6200/6250] eta: 0:00:07 lr: 0.000020 grad: 0.1301 (0.1288) loss: 0.8042 (0.8099) time: 0.1611 data: 0.0760 max mem: 9377 +Train: [74] [6249/6250] eta: 0:00:00 lr: 0.000020 grad: 0.1277 (0.1288) loss: 0.8094 (0.8099) time: 0.1286 data: 0.0475 max mem: 9377 +Train: [74] Total time: 0:15:42 (0.1508 s / it) +Averaged stats: lr: 0.000020 grad: 0.1277 (0.1288) loss: 0.8094 (0.8099) +Eval (hcp-train-subset): [74] [ 0/62] eta: 0:07:20 loss: 0.8210 (0.8210) time: 7.1018 data: 7.0697 max mem: 9377 +Eval (hcp-train-subset): [74] [61/62] eta: 0:00:00 loss: 0.8149 (0.8174) time: 0.1048 data: 0.0777 max mem: 9377 +Eval (hcp-train-subset): [74] Total time: 0:00:15 (0.2563 s / it) +Averaged stats (hcp-train-subset): loss: 0.8149 (0.8174) +Making plots (hcp-train-subset): example=56 +Eval (hcp-val): [74] [ 0/62] eta: 0:05:33 loss: 0.8307 (0.8307) time: 5.3838 data: 5.3518 max mem: 9377 +Eval (hcp-val): [74] [61/62] eta: 0:00:00 loss: 0.8300 (0.8318) time: 0.1477 data: 0.1208 max mem: 9377 +Eval (hcp-val): [74] Total time: 0:00:14 (0.2364 s / it) +Averaged stats (hcp-val): loss: 0.8300 (0.8318) +Making plots (hcp-val): example=61 +Eval (nsd-val): [74] [ 0/62] eta: 0:06:53 loss: 0.8019 (0.8019) time: 6.6615 data: 6.6301 max mem: 9377 +Eval (nsd-val): [74] [61/62] eta: 0:00:00 loss: 0.8084 (0.8116) time: 0.1183 data: 0.0932 max mem: 9377 +Eval (nsd-val): [74] Total time: 0:00:14 (0.2407 s / it) +Averaged stats (nsd-val): loss: 0.8084 (0.8116) +Making plots (nsd-val): example=37 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00074.pth +Train: [75] [ 0/6250] eta: 10:37:24 lr: 0.000020 grad: 0.4310 (0.4310) loss: 0.7863 (0.7863) time: 6.1191 data: 5.9796 max mem: 9377 +Train: [75] [ 100/6250] eta: 0:22:21 lr: 0.000020 grad: 0.1367 (0.1610) loss: 0.8194 (0.8217) time: 0.1772 data: 0.0846 max mem: 9377 +Train: [75] [ 200/6250] eta: 0:18:58 lr: 0.000020 grad: 0.1316 (0.1537) loss: 0.8149 (0.8165) time: 0.1399 data: 0.0452 max mem: 9377 +Train: [75] [ 300/6250] eta: 0:17:06 lr: 0.000020 grad: 0.1341 (0.1490) loss: 0.8205 (0.8136) time: 0.1257 data: 0.0383 max mem: 9377 +Train: [75] [ 400/6250] eta: 0:16:09 lr: 0.000020 grad: 0.1291 (0.1441) loss: 0.8160 (0.8133) time: 0.1445 data: 0.0482 max mem: 9377 +Train: [75] [ 500/6250] eta: 0:15:20 lr: 0.000020 grad: 0.1186 (0.1405) loss: 0.8157 (0.8129) time: 0.1392 data: 0.0545 max mem: 9377 +Train: [75] [ 600/6250] eta: 0:15:02 lr: 0.000020 grad: 0.1210 (0.1375) loss: 0.8197 (0.8135) time: 0.1757 data: 0.0946 max mem: 9377 +Train: [75] [ 700/6250] eta: 0:14:38 lr: 0.000020 grad: 0.1182 (0.1352) loss: 0.8169 (0.8138) time: 0.1554 data: 0.0718 max mem: 9377 +Train: [75] [ 800/6250] eta: 0:14:19 lr: 0.000020 grad: 0.1221 (0.1339) loss: 0.8127 (0.8142) time: 0.1541 data: 0.0612 max mem: 9377 +Train: [75] [ 900/6250] eta: 0:13:58 lr: 0.000020 grad: 0.1225 (0.1326) loss: 0.8214 (0.8146) time: 0.1674 data: 0.0891 max mem: 9377 +Train: [75] [1000/6250] eta: 0:13:37 lr: 0.000020 grad: 0.1177 (0.1319) loss: 0.8159 (0.8147) time: 0.1632 data: 0.0801 max mem: 9377 +Train: [75] [1100/6250] eta: 0:13:16 lr: 0.000020 grad: 0.1199 (0.1312) loss: 0.8197 (0.8148) time: 0.1463 data: 0.0626 max mem: 9377 +Train: [75] [1200/6250] eta: 0:12:58 lr: 0.000020 grad: 0.1277 (0.1306) loss: 0.8121 (0.8146) time: 0.1892 data: 0.1071 max mem: 9377 +Train: [75] [1300/6250] eta: 0:12:37 lr: 0.000020 grad: 0.1315 (0.1303) loss: 0.8130 (0.8145) time: 0.1335 data: 0.0487 max mem: 9377 +Train: [75] [1400/6250] eta: 0:12:16 lr: 0.000020 grad: 0.1194 (0.1302) loss: 0.8140 (0.8143) time: 0.1423 data: 0.0555 max mem: 9377 +Train: [75] [1500/6250] eta: 0:11:57 lr: 0.000020 grad: 0.1220 (0.1300) loss: 0.8087 (0.8142) time: 0.1492 data: 0.0665 max mem: 9377 +Train: [75] [1600/6250] eta: 0:11:39 lr: 0.000020 grad: 0.1278 (0.1300) loss: 0.8072 (0.8138) time: 0.1379 data: 0.0580 max mem: 9377 +Train: [75] [1700/6250] eta: 0:11:22 lr: 0.000020 grad: 0.1227 (0.1297) loss: 0.8176 (0.8137) time: 0.1513 data: 0.0645 max mem: 9377 +Train: [75] [1800/6250] eta: 0:11:05 lr: 0.000020 grad: 0.1269 (0.1294) loss: 0.8073 (0.8137) time: 0.1611 data: 0.0829 max mem: 9377 +Train: [75] [1900/6250] eta: 0:10:52 lr: 0.000020 grad: 0.1157 (0.1292) loss: 0.8144 (0.8136) time: 0.2044 data: 0.1305 max mem: 9377 +Train: [75] [2000/6250] eta: 0:10:34 lr: 0.000020 grad: 0.1159 (0.1288) loss: 0.8139 (0.8135) time: 0.1498 data: 0.0651 max mem: 9377 +Train: [75] [2100/6250] eta: 0:10:19 lr: 0.000020 grad: 0.1241 (0.1286) loss: 0.8084 (0.8135) time: 0.1509 data: 0.0660 max mem: 9377 +Train: [75] [2200/6250] eta: 0:10:03 lr: 0.000020 grad: 0.1231 (0.1285) loss: 0.8104 (0.8133) time: 0.1323 data: 0.0481 max mem: 9377 +Train: [75] [2300/6250] eta: 0:09:47 lr: 0.000020 grad: 0.1262 (0.1284) loss: 0.8097 (0.8132) time: 0.1630 data: 0.0848 max mem: 9377 +Train: [75] [2400/6250] eta: 0:09:31 lr: 0.000020 grad: 0.1207 (0.1282) loss: 0.8179 (0.8131) time: 0.1354 data: 0.0527 max mem: 9377 +Train: [75] [2500/6250] eta: 0:09:16 lr: 0.000020 grad: 0.1225 (0.1281) loss: 0.8083 (0.8130) time: 0.1502 data: 0.0706 max mem: 9377 +Train: [75] [2600/6250] eta: 0:09:04 lr: 0.000020 grad: 0.1300 (0.1281) loss: 0.8079 (0.8128) time: 0.1462 data: 0.0610 max mem: 9377 +Train: [75] [2700/6250] eta: 0:08:48 lr: 0.000020 grad: 0.1253 (0.1282) loss: 0.8081 (0.8125) time: 0.1392 data: 0.0524 max mem: 9377 +Train: [75] [2800/6250] eta: 0:08:34 lr: 0.000019 grad: 0.1345 (0.1283) loss: 0.8038 (0.8124) time: 0.1461 data: 0.0652 max mem: 9377 +Train: [75] [2900/6250] eta: 0:08:19 lr: 0.000019 grad: 0.1242 (0.1284) loss: 0.8109 (0.8123) time: 0.1497 data: 0.0664 max mem: 9377 +Train: [75] [3000/6250] eta: 0:08:04 lr: 0.000019 grad: 0.1323 (0.1283) loss: 0.8052 (0.8122) time: 0.1506 data: 0.0748 max mem: 9377 +Train: [75] [3100/6250] eta: 0:07:49 lr: 0.000019 grad: 0.1247 (0.1283) loss: 0.8112 (0.8121) time: 0.1468 data: 0.0633 max mem: 9377 +Train: [75] [3200/6250] eta: 0:07:33 lr: 0.000019 grad: 0.1202 (0.1283) loss: 0.8110 (0.8120) time: 0.1430 data: 0.0583 max mem: 9377 +Train: [75] [3300/6250] eta: 0:07:18 lr: 0.000019 grad: 0.1283 (0.1283) loss: 0.8076 (0.8118) time: 0.1614 data: 0.0844 max mem: 9377 +Train: [75] [3400/6250] eta: 0:07:02 lr: 0.000019 grad: 0.1305 (0.1283) loss: 0.8084 (0.8117) time: 0.1379 data: 0.0483 max mem: 9377 +Train: [75] [3500/6250] eta: 0:06:47 lr: 0.000019 grad: 0.1229 (0.1283) loss: 0.8090 (0.8116) time: 0.1917 data: 0.1061 max mem: 9377 +Train: [75] [3600/6250] eta: 0:06:31 lr: 0.000019 grad: 0.1247 (0.1285) loss: 0.8075 (0.8115) time: 0.1301 data: 0.0410 max mem: 9377 +Train: [75] [3700/6250] eta: 0:06:15 lr: 0.000019 grad: 0.1351 (0.1286) loss: 0.7973 (0.8113) time: 0.1494 data: 0.0643 max mem: 9377 +Train: [75] [3800/6250] eta: 0:05:59 lr: 0.000019 grad: 0.1347 (0.1287) loss: 0.8019 (0.8111) time: 0.1253 data: 0.0386 max mem: 9377 +Train: [75] [3900/6250] eta: 0:05:44 lr: 0.000019 grad: 0.1274 (0.1287) loss: 0.8051 (0.8110) time: 0.1297 data: 0.0394 max mem: 9377 +Train: [75] [4000/6250] eta: 0:05:30 lr: 0.000019 grad: 0.1324 (0.1288) loss: 0.8012 (0.8109) time: 0.1662 data: 0.0861 max mem: 9377 +Train: [75] [4100/6250] eta: 0:05:15 lr: 0.000019 grad: 0.1255 (0.1288) loss: 0.8051 (0.8108) time: 0.1500 data: 0.0685 max mem: 9377 +Train: [75] [4200/6250] eta: 0:05:00 lr: 0.000019 grad: 0.1322 (0.1289) loss: 0.8110 (0.8107) time: 0.1458 data: 0.0621 max mem: 9377 +Train: [75] [4300/6250] eta: 0:04:45 lr: 0.000019 grad: 0.1254 (0.1289) loss: 0.8045 (0.8106) time: 0.1382 data: 0.0543 max mem: 9377 +Train: [75] [4400/6250] eta: 0:04:31 lr: 0.000019 grad: 0.1174 (0.1289) loss: 0.8196 (0.8106) time: 0.1559 data: 0.0730 max mem: 9377 +Train: [75] [4500/6250] eta: 0:04:16 lr: 0.000019 grad: 0.1250 (0.1289) loss: 0.8120 (0.8105) time: 0.1361 data: 0.0517 max mem: 9377 +Train: [75] [4600/6250] eta: 0:04:01 lr: 0.000019 grad: 0.1282 (0.1291) loss: 0.8056 (0.8105) time: 0.1261 data: 0.0449 max mem: 9377 +Train: [75] [4700/6250] eta: 0:03:47 lr: 0.000019 grad: 0.1239 (0.1291) loss: 0.8159 (0.8104) time: 0.1321 data: 0.0505 max mem: 9377 +Train: [75] [4800/6250] eta: 0:03:33 lr: 0.000019 grad: 0.1301 (0.1291) loss: 0.8073 (0.8104) time: 0.1877 data: 0.1077 max mem: 9377 +Train: [75] [4900/6250] eta: 0:03:18 lr: 0.000019 grad: 0.1245 (0.1292) loss: 0.8056 (0.8103) time: 0.1634 data: 0.0820 max mem: 9377 +Train: [75] [5000/6250] eta: 0:03:03 lr: 0.000019 grad: 0.1284 (0.1292) loss: 0.8091 (0.8103) time: 0.1550 data: 0.0782 max mem: 9377 +Train: [75] [5100/6250] eta: 0:02:49 lr: 0.000019 grad: 0.1298 (0.1293) loss: 0.8039 (0.8102) time: 0.1502 data: 0.0674 max mem: 9377 +Train: [75] [5200/6250] eta: 0:02:34 lr: 0.000019 grad: 0.1263 (0.1293) loss: 0.8157 (0.8101) time: 0.1339 data: 0.0590 max mem: 9377 +Train: [75] [5300/6250] eta: 0:02:19 lr: 0.000019 grad: 0.1284 (0.1294) loss: 0.8115 (0.8100) time: 0.1719 data: 0.0906 max mem: 9377 +Train: [75] [5400/6250] eta: 0:02:04 lr: 0.000019 grad: 0.1288 (0.1294) loss: 0.8001 (0.8100) time: 0.1031 data: 0.0168 max mem: 9377 +Train: [75] [5500/6250] eta: 0:01:50 lr: 0.000019 grad: 0.1216 (0.1294) loss: 0.8102 (0.8099) time: 0.1559 data: 0.0772 max mem: 9377 +Train: [75] [5600/6250] eta: 0:01:35 lr: 0.000019 grad: 0.1311 (0.1294) loss: 0.8094 (0.8099) time: 0.1279 data: 0.0416 max mem: 9377 +Train: [75] [5700/6250] eta: 0:01:20 lr: 0.000019 grad: 0.1284 (0.1295) loss: 0.7980 (0.8099) time: 0.1485 data: 0.0694 max mem: 9377 +Train: [75] [5800/6250] eta: 0:01:05 lr: 0.000019 grad: 0.1250 (0.1296) loss: 0.8090 (0.8098) time: 0.1217 data: 0.0344 max mem: 9377 +Train: [75] [5900/6250] eta: 0:00:51 lr: 0.000019 grad: 0.1352 (0.1297) loss: 0.8113 (0.8097) time: 0.1651 data: 0.0816 max mem: 9377 +Train: [75] [6000/6250] eta: 0:00:36 lr: 0.000019 grad: 0.1316 (0.1298) loss: 0.8071 (0.8096) time: 0.1391 data: 0.0596 max mem: 9377 +Train: [75] [6100/6250] eta: 0:00:21 lr: 0.000019 grad: 0.1341 (0.1299) loss: 0.7993 (0.8096) time: 0.1410 data: 0.0582 max mem: 9377 +Train: [75] [6200/6250] eta: 0:00:07 lr: 0.000019 grad: 0.1274 (0.1300) loss: 0.8006 (0.8095) time: 0.1284 data: 0.0431 max mem: 9377 +Train: [75] [6249/6250] eta: 0:00:00 lr: 0.000019 grad: 0.1345 (0.1300) loss: 0.7985 (0.8094) time: 0.1741 data: 0.0886 max mem: 9377 +Train: [75] Total time: 0:15:22 (0.1475 s / it) +Averaged stats: lr: 0.000019 grad: 0.1345 (0.1300) loss: 0.7985 (0.8094) +Eval (hcp-train-subset): [75] [ 0/62] eta: 0:06:17 loss: 0.8255 (0.8255) time: 6.0955 data: 6.0608 max mem: 9377 +Eval (hcp-train-subset): [75] [61/62] eta: 0:00:00 loss: 0.8144 (0.8179) time: 0.1355 data: 0.1084 max mem: 9377 +Eval (hcp-train-subset): [75] Total time: 0:00:14 (0.2361 s / it) +Averaged stats (hcp-train-subset): loss: 0.8144 (0.8179) +Eval (hcp-val): [75] [ 0/62] eta: 0:04:51 loss: 0.8277 (0.8277) time: 4.7049 data: 4.6672 max mem: 9377 +Eval (hcp-val): [75] [61/62] eta: 0:00:00 loss: 0.8320 (0.8320) time: 0.1342 data: 0.1088 max mem: 9377 +Eval (hcp-val): [75] Total time: 0:00:13 (0.2246 s / it) +Averaged stats (hcp-val): loss: 0.8320 (0.8320) +Eval (nsd-val): [75] [ 0/62] eta: 0:03:58 loss: 0.8016 (0.8016) time: 3.8398 data: 3.7573 max mem: 9377 +Eval (nsd-val): [75] [61/62] eta: 0:00:00 loss: 0.8128 (0.8134) time: 0.1448 data: 0.1185 max mem: 9377 +Eval (nsd-val): [75] Total time: 0:00:14 (0.2276 s / it) +Averaged stats (nsd-val): loss: 0.8128 (0.8134) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [76] [ 0/6250] eta: 9:04:20 lr: 0.000019 grad: 0.0950 (0.0950) loss: 0.8692 (0.8692) time: 5.2256 data: 4.9729 max mem: 9377 +Train: [76] [ 100/6250] eta: 0:20:53 lr: 0.000019 grad: 0.1649 (0.1976) loss: 0.8014 (0.8146) time: 0.1401 data: 0.0411 max mem: 9377 +Train: [76] [ 200/6250] eta: 0:18:08 lr: 0.000019 grad: 0.1443 (0.1791) loss: 0.8095 (0.8118) time: 0.1544 data: 0.0476 max mem: 9377 +Train: [76] [ 300/6250] eta: 0:17:06 lr: 0.000019 grad: 0.1376 (0.1697) loss: 0.8072 (0.8098) time: 0.1730 data: 0.0768 max mem: 9377 +Train: [76] [ 400/6250] eta: 0:16:57 lr: 0.000019 grad: 0.1223 (0.1620) loss: 0.8024 (0.8083) time: 0.1503 data: 0.0543 max mem: 9377 +Train: [76] [ 500/6250] eta: 0:16:35 lr: 0.000019 grad: 0.1277 (0.1574) loss: 0.8102 (0.8080) time: 0.1784 data: 0.0903 max mem: 9377 +Train: [76] [ 600/6250] eta: 0:16:08 lr: 0.000019 grad: 0.1314 (0.1530) loss: 0.8176 (0.8087) time: 0.1586 data: 0.0678 max mem: 9377 +Train: [76] [ 700/6250] eta: 0:15:40 lr: 0.000019 grad: 0.1321 (0.1493) loss: 0.8134 (0.8093) time: 0.1846 data: 0.0983 max mem: 9377 +Train: [76] [ 800/6250] eta: 0:15:27 lr: 0.000018 grad: 0.1205 (0.1462) loss: 0.8175 (0.8098) time: 0.1887 data: 0.0955 max mem: 9377 +Train: [76] [ 900/6250] eta: 0:14:53 lr: 0.000018 grad: 0.1235 (0.1443) loss: 0.8102 (0.8100) time: 0.1286 data: 0.0403 max mem: 9377 +Train: [76] [1000/6250] eta: 0:14:24 lr: 0.000018 grad: 0.1274 (0.1431) loss: 0.8093 (0.8101) time: 0.1499 data: 0.0656 max mem: 9377 +Train: [76] [1100/6250] eta: 0:13:57 lr: 0.000018 grad: 0.1229 (0.1416) loss: 0.8133 (0.8104) time: 0.1558 data: 0.0733 max mem: 9377 +Train: [76] [1200/6250] eta: 0:13:41 lr: 0.000018 grad: 0.1295 (0.1403) loss: 0.8069 (0.8104) time: 0.1628 data: 0.0790 max mem: 9377 +Train: [76] [1300/6250] eta: 0:13:25 lr: 0.000018 grad: 0.1241 (0.1392) loss: 0.8120 (0.8105) time: 0.1642 data: 0.0848 max mem: 9377 +Train: [76] [1400/6250] eta: 0:13:10 lr: 0.000018 grad: 0.1250 (0.1386) loss: 0.8153 (0.8104) time: 0.1912 data: 0.1142 max mem: 9377 +Train: [76] [1500/6250] eta: 0:12:53 lr: 0.000018 grad: 0.1237 (0.1379) loss: 0.8010 (0.8102) time: 0.1526 data: 0.0705 max mem: 9377 +Train: [76] [1600/6250] eta: 0:12:31 lr: 0.000018 grad: 0.1264 (0.1372) loss: 0.8052 (0.8101) time: 0.1541 data: 0.0675 max mem: 9377 +Train: [76] [1700/6250] eta: 0:12:10 lr: 0.000018 grad: 0.1264 (0.1369) loss: 0.8020 (0.8098) time: 0.1437 data: 0.0581 max mem: 9377 +Train: [76] [1800/6250] eta: 0:11:51 lr: 0.000018 grad: 0.1257 (0.1363) loss: 0.8057 (0.8097) time: 0.1414 data: 0.0564 max mem: 9377 +Train: [76] [1900/6250] eta: 0:11:31 lr: 0.000018 grad: 0.1344 (0.1359) loss: 0.8012 (0.8095) time: 0.1365 data: 0.0513 max mem: 9377 +Train: [76] [2000/6250] eta: 0:11:12 lr: 0.000018 grad: 0.1234 (0.1356) loss: 0.8160 (0.8093) time: 0.1361 data: 0.0489 max mem: 9377 +Train: [76] [2100/6250] eta: 0:10:55 lr: 0.000018 grad: 0.1299 (0.1353) loss: 0.8083 (0.8093) time: 0.1550 data: 0.0700 max mem: 9377 +Train: [76] [2200/6250] eta: 0:10:36 lr: 0.000018 grad: 0.1224 (0.1351) loss: 0.8075 (0.8091) time: 0.1321 data: 0.0530 max mem: 9377 +Train: [76] [2300/6250] eta: 0:10:22 lr: 0.000018 grad: 0.1281 (0.1348) loss: 0.8068 (0.8092) time: 0.2233 data: 0.1487 max mem: 9377 +Train: [76] [2400/6250] eta: 0:10:07 lr: 0.000018 grad: 0.1305 (0.1347) loss: 0.8061 (0.8091) time: 0.1479 data: 0.0582 max mem: 9377 +Train: [76] [2500/6250] eta: 0:09:52 lr: 0.000018 grad: 0.1237 (0.1346) loss: 0.8113 (0.8091) time: 0.1772 data: 0.0946 max mem: 9377 +Train: [76] [2600/6250] eta: 0:09:37 lr: 0.000018 grad: 0.1256 (0.1344) loss: 0.8022 (0.8091) time: 0.1630 data: 0.0805 max mem: 9377 +Train: [76] [2700/6250] eta: 0:09:21 lr: 0.000018 grad: 0.1278 (0.1343) loss: 0.8112 (0.8090) time: 0.1499 data: 0.0692 max mem: 9377 +Train: [76] [2800/6250] eta: 0:09:03 lr: 0.000018 grad: 0.1193 (0.1340) loss: 0.8159 (0.8091) time: 0.1344 data: 0.0497 max mem: 9377 +Train: [76] [2900/6250] eta: 0:08:47 lr: 0.000018 grad: 0.1268 (0.1338) loss: 0.8062 (0.8091) time: 0.1390 data: 0.0518 max mem: 9377 +Train: [76] [3000/6250] eta: 0:08:29 lr: 0.000018 grad: 0.1207 (0.1337) loss: 0.8094 (0.8091) time: 0.1357 data: 0.0495 max mem: 9377 +Train: [76] [3100/6250] eta: 0:08:12 lr: 0.000018 grad: 0.1190 (0.1333) loss: 0.8174 (0.8092) time: 0.1335 data: 0.0530 max mem: 9377 +Train: [76] [3200/6250] eta: 0:07:56 lr: 0.000018 grad: 0.1277 (0.1331) loss: 0.8073 (0.8092) time: 0.1426 data: 0.0602 max mem: 9377 +Train: [76] [3300/6250] eta: 0:07:39 lr: 0.000018 grad: 0.1265 (0.1331) loss: 0.8064 (0.8093) time: 0.1623 data: 0.0874 max mem: 9377 +Train: [76] [3400/6250] eta: 0:07:24 lr: 0.000018 grad: 0.1240 (0.1330) loss: 0.8084 (0.8093) time: 0.1638 data: 0.0839 max mem: 9377 +Train: [76] [3500/6250] eta: 0:07:09 lr: 0.000018 grad: 0.1260 (0.1328) loss: 0.8083 (0.8093) time: 0.1789 data: 0.0965 max mem: 9377 +Train: [76] [3600/6250] eta: 0:06:54 lr: 0.000018 grad: 0.1291 (0.1329) loss: 0.8126 (0.8092) time: 0.1429 data: 0.0597 max mem: 9377 +Train: [76] [3700/6250] eta: 0:06:40 lr: 0.000018 grad: 0.1270 (0.1329) loss: 0.8074 (0.8092) time: 0.1536 data: 0.0696 max mem: 9377 +Train: [76] [3800/6250] eta: 0:06:25 lr: 0.000018 grad: 0.1243 (0.1328) loss: 0.8055 (0.8092) time: 0.1621 data: 0.0777 max mem: 9377 +Train: [76] [3900/6250] eta: 0:06:10 lr: 0.000018 grad: 0.1307 (0.1328) loss: 0.8031 (0.8091) time: 0.1890 data: 0.1133 max mem: 9377 +Train: [76] [4000/6250] eta: 0:05:54 lr: 0.000018 grad: 0.1344 (0.1329) loss: 0.8079 (0.8091) time: 0.1539 data: 0.0738 max mem: 9377 +Train: [76] [4100/6250] eta: 0:05:38 lr: 0.000018 grad: 0.1263 (0.1329) loss: 0.8098 (0.8090) time: 0.1507 data: 0.0666 max mem: 9377 +Train: [76] [4200/6250] eta: 0:05:22 lr: 0.000018 grad: 0.1220 (0.1328) loss: 0.8054 (0.8090) time: 0.1580 data: 0.0778 max mem: 9377 +Train: [76] [4300/6250] eta: 0:05:06 lr: 0.000018 grad: 0.1327 (0.1327) loss: 0.8065 (0.8091) time: 0.1515 data: 0.0553 max mem: 9377 +Train: [76] [4400/6250] eta: 0:04:50 lr: 0.000018 grad: 0.1320 (0.1327) loss: 0.8052 (0.8091) time: 0.1521 data: 0.0699 max mem: 9377 +Train: [76] [4500/6250] eta: 0:04:33 lr: 0.000018 grad: 0.1260 (0.1326) loss: 0.8095 (0.8091) time: 0.1362 data: 0.0542 max mem: 9377 +Train: [76] [4600/6250] eta: 0:04:17 lr: 0.000018 grad: 0.1234 (0.1326) loss: 0.8055 (0.8091) time: 0.1389 data: 0.0508 max mem: 9377 +Train: [76] [4700/6250] eta: 0:04:01 lr: 0.000018 grad: 0.1351 (0.1326) loss: 0.7994 (0.8091) time: 0.1425 data: 0.0589 max mem: 9377 +Train: [76] [4800/6250] eta: 0:03:45 lr: 0.000018 grad: 0.1307 (0.1326) loss: 0.8128 (0.8091) time: 0.1341 data: 0.0463 max mem: 9377 +Train: [76] [4900/6250] eta: 0:03:29 lr: 0.000018 grad: 0.1372 (0.1327) loss: 0.8118 (0.8090) time: 0.1500 data: 0.0681 max mem: 9377 +Train: [76] [5000/6250] eta: 0:03:13 lr: 0.000018 grad: 0.1405 (0.1328) loss: 0.8001 (0.8089) time: 0.1478 data: 0.0647 max mem: 9377 +Train: [76] [5100/6250] eta: 0:02:58 lr: 0.000017 grad: 0.1325 (0.1327) loss: 0.8032 (0.8089) time: 0.1621 data: 0.0819 max mem: 9377 +Train: [76] [5200/6250] eta: 0:02:42 lr: 0.000017 grad: 0.1222 (0.1327) loss: 0.8144 (0.8089) time: 0.1471 data: 0.0629 max mem: 9377 +Train: [76] [5300/6250] eta: 0:02:27 lr: 0.000017 grad: 0.1325 (0.1327) loss: 0.7967 (0.8088) time: 0.1648 data: 0.0820 max mem: 9377 +Train: [76] [5400/6250] eta: 0:02:11 lr: 0.000017 grad: 0.1237 (0.1328) loss: 0.8063 (0.8087) time: 0.1324 data: 0.0477 max mem: 9377 +Train: [76] [5500/6250] eta: 0:01:55 lr: 0.000017 grad: 0.1367 (0.1328) loss: 0.8023 (0.8086) time: 0.1153 data: 0.0347 max mem: 9377 +Train: [76] [5600/6250] eta: 0:01:40 lr: 0.000017 grad: 0.1267 (0.1329) loss: 0.8021 (0.8086) time: 0.1211 data: 0.0364 max mem: 9377 +Train: [76] [5700/6250] eta: 0:01:24 lr: 0.000017 grad: 0.1298 (0.1329) loss: 0.8081 (0.8085) time: 0.1449 data: 0.0619 max mem: 9377 +Train: [76] [5800/6250] eta: 0:01:09 lr: 0.000017 grad: 0.1251 (0.1328) loss: 0.8129 (0.8084) time: 0.1381 data: 0.0603 max mem: 9377 +Train: [76] [5900/6250] eta: 0:00:53 lr: 0.000017 grad: 0.1326 (0.1327) loss: 0.8033 (0.8084) time: 0.1363 data: 0.0505 max mem: 9377 +Train: [76] [6000/6250] eta: 0:00:38 lr: 0.000017 grad: 0.1308 (0.1327) loss: 0.8055 (0.8084) time: 0.2016 data: 0.1215 max mem: 9377 +Train: [76] [6100/6250] eta: 0:00:23 lr: 0.000017 grad: 0.1296 (0.1327) loss: 0.8030 (0.8085) time: 0.1508 data: 0.0662 max mem: 9377 +Train: [76] [6200/6250] eta: 0:00:07 lr: 0.000017 grad: 0.1247 (0.1327) loss: 0.8106 (0.8085) time: 0.1510 data: 0.0692 max mem: 9377 +Train: [76] [6249/6250] eta: 0:00:00 lr: 0.000017 grad: 0.1249 (0.1327) loss: 0.8076 (0.8085) time: 0.1382 data: 0.0532 max mem: 9377 +Train: [76] Total time: 0:16:06 (0.1546 s / it) +Averaged stats: lr: 0.000017 grad: 0.1249 (0.1327) loss: 0.8076 (0.8085) +Eval (hcp-train-subset): [76] [ 0/62] eta: 0:06:55 loss: 0.8244 (0.8244) time: 6.7088 data: 6.6768 max mem: 9377 +Eval (hcp-train-subset): [76] [61/62] eta: 0:00:00 loss: 0.8138 (0.8170) time: 0.1364 data: 0.1112 max mem: 9377 +Eval (hcp-train-subset): [76] Total time: 0:00:14 (0.2352 s / it) +Averaged stats (hcp-train-subset): loss: 0.8138 (0.8170) +Eval (hcp-val): [76] [ 0/62] eta: 0:06:16 loss: 0.8276 (0.8276) time: 6.0767 data: 6.0438 max mem: 9377 +Eval (hcp-val): [76] [61/62] eta: 0:00:00 loss: 0.8293 (0.8305) time: 0.1066 data: 0.0814 max mem: 9377 +Eval (hcp-val): [76] Total time: 0:00:13 (0.2225 s / it) +Averaged stats (hcp-val): loss: 0.8293 (0.8305) +Eval (nsd-val): [76] [ 0/62] eta: 0:05:10 loss: 0.7985 (0.7985) time: 5.0082 data: 4.9776 max mem: 9377 +Eval (nsd-val): [76] [61/62] eta: 0:00:00 loss: 0.8095 (0.8110) time: 0.1480 data: 0.1210 max mem: 9377 +Eval (nsd-val): [76] Total time: 0:00:13 (0.2223 s / it) +Averaged stats (nsd-val): loss: 0.8095 (0.8110) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [77] [ 0/6250] eta: 8:27:45 lr: 0.000017 grad: 0.1661 (0.1661) loss: 0.8476 (0.8476) time: 4.8745 data: 4.5769 max mem: 9377 +Train: [77] [ 100/6250] eta: 0:20:47 lr: 0.000017 grad: 0.1299 (0.1722) loss: 0.8197 (0.8206) time: 0.1537 data: 0.0578 max mem: 9377 +Train: [77] [ 200/6250] eta: 0:18:03 lr: 0.000017 grad: 0.1469 (0.1659) loss: 0.8050 (0.8116) time: 0.1575 data: 0.0607 max mem: 9377 +Train: [77] [ 300/6250] eta: 0:16:37 lr: 0.000017 grad: 0.1451 (0.1608) loss: 0.8036 (0.8097) time: 0.1415 data: 0.0598 max mem: 9377 +Train: [77] [ 400/6250] eta: 0:15:33 lr: 0.000017 grad: 0.1295 (0.1540) loss: 0.8088 (0.8106) time: 0.1269 data: 0.0437 max mem: 9377 +Train: [77] [ 500/6250] eta: 0:14:55 lr: 0.000017 grad: 0.1267 (0.1516) loss: 0.8150 (0.8096) time: 0.1472 data: 0.0557 max mem: 9377 +Train: [77] [ 600/6250] eta: 0:14:33 lr: 0.000017 grad: 0.1198 (0.1482) loss: 0.8161 (0.8095) time: 0.1747 data: 0.0923 max mem: 9377 +Train: [77] [ 700/6250] eta: 0:14:10 lr: 0.000017 grad: 0.1238 (0.1455) loss: 0.8159 (0.8098) time: 0.1270 data: 0.0368 max mem: 9377 +Train: [77] [ 800/6250] eta: 0:13:47 lr: 0.000017 grad: 0.1166 (0.1433) loss: 0.8222 (0.8102) time: 0.1515 data: 0.0654 max mem: 9377 +Train: [77] [ 900/6250] eta: 0:13:23 lr: 0.000017 grad: 0.1218 (0.1415) loss: 0.8234 (0.8105) time: 0.1355 data: 0.0431 max mem: 9377 +Train: [77] [1000/6250] eta: 0:12:59 lr: 0.000017 grad: 0.1302 (0.1403) loss: 0.8108 (0.8107) time: 0.1367 data: 0.0369 max mem: 9377 +Train: [77] [1100/6250] eta: 0:12:40 lr: 0.000017 grad: 0.1329 (0.1392) loss: 0.8095 (0.8107) time: 0.1485 data: 0.0599 max mem: 9377 +Train: [77] [1200/6250] eta: 0:12:21 lr: 0.000017 grad: 0.1288 (0.1385) loss: 0.8087 (0.8106) time: 0.1396 data: 0.0542 max mem: 9377 +Train: [77] [1300/6250] eta: 0:12:04 lr: 0.000017 grad: 0.1235 (0.1377) loss: 0.8128 (0.8108) time: 0.1449 data: 0.0525 max mem: 9377 +Train: [77] [1400/6250] eta: 0:11:46 lr: 0.000017 grad: 0.1221 (0.1370) loss: 0.8065 (0.8107) time: 0.1227 data: 0.0342 max mem: 9377 +Train: [77] [1500/6250] eta: 0:11:31 lr: 0.000017 grad: 0.1315 (0.1369) loss: 0.8071 (0.8103) time: 0.1392 data: 0.0607 max mem: 9377 +Train: [77] [1600/6250] eta: 0:11:16 lr: 0.000017 grad: 0.1356 (0.1368) loss: 0.8038 (0.8100) time: 0.1484 data: 0.0713 max mem: 9377 +Train: [77] [1700/6250] eta: 0:11:02 lr: 0.000017 grad: 0.1345 (0.1367) loss: 0.8052 (0.8098) time: 0.1528 data: 0.0764 max mem: 9377 +Train: [77] [1800/6250] eta: 0:10:50 lr: 0.000017 grad: 0.1239 (0.1364) loss: 0.8103 (0.8097) time: 0.1406 data: 0.0570 max mem: 9377 +Train: [77] [1900/6250] eta: 0:10:38 lr: 0.000017 grad: 0.1372 (0.1366) loss: 0.8049 (0.8095) time: 0.1616 data: 0.0849 max mem: 9377 +Train: [77] [2000/6250] eta: 0:10:29 lr: 0.000017 grad: 0.1271 (0.1365) loss: 0.8046 (0.8093) time: 0.1952 data: 0.1148 max mem: 9377 +Train: [77] [2100/6250] eta: 0:10:17 lr: 0.000017 grad: 0.1299 (0.1364) loss: 0.7988 (0.8090) time: 0.1624 data: 0.0810 max mem: 9377 +Train: [77] [2200/6250] eta: 0:10:02 lr: 0.000017 grad: 0.1355 (0.1362) loss: 0.8026 (0.8087) time: 0.1377 data: 0.0560 max mem: 9377 +Train: [77] [2300/6250] eta: 0:09:45 lr: 0.000017 grad: 0.1221 (0.1360) loss: 0.8061 (0.8086) time: 0.1370 data: 0.0502 max mem: 9377 +Train: [77] [2400/6250] eta: 0:09:30 lr: 0.000017 grad: 0.1273 (0.1357) loss: 0.8023 (0.8085) time: 0.1430 data: 0.0579 max mem: 9377 +Train: [77] [2500/6250] eta: 0:09:15 lr: 0.000017 grad: 0.1217 (0.1355) loss: 0.8099 (0.8085) time: 0.1290 data: 0.0443 max mem: 9377 +Train: [77] [2600/6250] eta: 0:08:59 lr: 0.000017 grad: 0.1324 (0.1354) loss: 0.8039 (0.8083) time: 0.1331 data: 0.0535 max mem: 9377 +Train: [77] [2700/6250] eta: 0:08:43 lr: 0.000017 grad: 0.1220 (0.1351) loss: 0.8029 (0.8083) time: 0.1417 data: 0.0606 max mem: 9377 +Train: [77] [2800/6250] eta: 0:08:30 lr: 0.000017 grad: 0.1227 (0.1348) loss: 0.8073 (0.8082) time: 0.1929 data: 0.0472 max mem: 9377 +Train: [77] [2900/6250] eta: 0:08:15 lr: 0.000017 grad: 0.1280 (0.1347) loss: 0.8093 (0.8081) time: 0.1653 data: 0.0770 max mem: 9377 +Train: [77] [3000/6250] eta: 0:08:00 lr: 0.000017 grad: 0.1313 (0.1346) loss: 0.8016 (0.8081) time: 0.1271 data: 0.0344 max mem: 9377 +Train: [77] [3100/6250] eta: 0:07:44 lr: 0.000017 grad: 0.1242 (0.1344) loss: 0.8065 (0.8081) time: 0.1355 data: 0.0376 max mem: 9377 +Train: [77] [3200/6250] eta: 0:07:29 lr: 0.000017 grad: 0.1416 (0.1343) loss: 0.7952 (0.8079) time: 0.1438 data: 0.0592 max mem: 9377 +Train: [77] [3300/6250] eta: 0:07:13 lr: 0.000016 grad: 0.1356 (0.1343) loss: 0.8133 (0.8079) time: 0.1377 data: 0.0535 max mem: 9377 +Train: [77] [3400/6250] eta: 0:06:58 lr: 0.000016 grad: 0.1233 (0.1342) loss: 0.8102 (0.8078) time: 0.1294 data: 0.0442 max mem: 9377 +Train: [77] [3500/6250] eta: 0:06:43 lr: 0.000016 grad: 0.1278 (0.1342) loss: 0.8025 (0.8077) time: 0.1547 data: 0.0736 max mem: 9377 +Train: [77] [3600/6250] eta: 0:06:28 lr: 0.000016 grad: 0.1291 (0.1342) loss: 0.8087 (0.8076) time: 0.1542 data: 0.0618 max mem: 9377 +Train: [77] [3700/6250] eta: 0:06:13 lr: 0.000016 grad: 0.1399 (0.1343) loss: 0.8007 (0.8074) time: 0.1357 data: 0.0550 max mem: 9377 +Train: [77] [3800/6250] eta: 0:05:58 lr: 0.000016 grad: 0.1261 (0.1343) loss: 0.7990 (0.8073) time: 0.1233 data: 0.0416 max mem: 9377 +Train: [77] [3900/6250] eta: 0:05:43 lr: 0.000016 grad: 0.1342 (0.1343) loss: 0.8034 (0.8071) time: 0.1450 data: 0.0537 max mem: 9377 +Train: [77] [4000/6250] eta: 0:05:28 lr: 0.000016 grad: 0.1372 (0.1343) loss: 0.8028 (0.8070) time: 0.1692 data: 0.0826 max mem: 9377 +Train: [77] [4100/6250] eta: 0:05:14 lr: 0.000016 grad: 0.1343 (0.1342) loss: 0.8002 (0.8068) time: 0.1346 data: 0.0477 max mem: 9377 +Train: [77] [4200/6250] eta: 0:04:59 lr: 0.000016 grad: 0.1375 (0.1343) loss: 0.8008 (0.8068) time: 0.1467 data: 0.0624 max mem: 9377 +Train: [77] [4300/6250] eta: 0:04:45 lr: 0.000016 grad: 0.1334 (0.1343) loss: 0.8009 (0.8067) time: 0.1471 data: 0.0567 max mem: 9377 +Train: [77] [4400/6250] eta: 0:04:31 lr: 0.000016 grad: 0.1354 (0.1344) loss: 0.7978 (0.8065) time: 0.1477 data: 0.0670 max mem: 9377 +Train: [77] [4500/6250] eta: 0:04:16 lr: 0.000016 grad: 0.1393 (0.1345) loss: 0.8019 (0.8064) time: 0.1663 data: 0.0909 max mem: 9377 +Train: [77] [4600/6250] eta: 0:04:02 lr: 0.000016 grad: 0.1302 (0.1346) loss: 0.8015 (0.8063) time: 0.1444 data: 0.0639 max mem: 9377 +Train: [77] [4700/6250] eta: 0:03:47 lr: 0.000016 grad: 0.1299 (0.1347) loss: 0.8132 (0.8062) time: 0.1341 data: 0.0446 max mem: 9377 +Train: [77] [4800/6250] eta: 0:03:32 lr: 0.000016 grad: 0.1304 (0.1347) loss: 0.8080 (0.8062) time: 0.1518 data: 0.0711 max mem: 9377 +Train: [77] [4900/6250] eta: 0:03:18 lr: 0.000016 grad: 0.1264 (0.1347) loss: 0.8130 (0.8062) time: 0.1654 data: 0.0829 max mem: 9377 +Train: [77] [5000/6250] eta: 0:03:03 lr: 0.000016 grad: 0.1281 (0.1347) loss: 0.8105 (0.8063) time: 0.1342 data: 0.0550 max mem: 9377 +Train: [77] [5100/6250] eta: 0:02:48 lr: 0.000016 grad: 0.1472 (0.1348) loss: 0.8063 (0.8063) time: 0.1215 data: 0.0339 max mem: 9377 +Train: [77] [5200/6250] eta: 0:02:33 lr: 0.000016 grad: 0.1460 (0.1349) loss: 0.8041 (0.8063) time: 0.1186 data: 0.0244 max mem: 9377 +Train: [77] [5300/6250] eta: 0:02:19 lr: 0.000016 grad: 0.1335 (0.1349) loss: 0.8056 (0.8063) time: 0.1435 data: 0.0603 max mem: 9377 +Train: [77] [5400/6250] eta: 0:02:04 lr: 0.000016 grad: 0.1318 (0.1349) loss: 0.8080 (0.8063) time: 0.1190 data: 0.0363 max mem: 9377 +Train: [77] [5500/6250] eta: 0:01:49 lr: 0.000016 grad: 0.1352 (0.1349) loss: 0.8117 (0.8063) time: 0.1493 data: 0.0644 max mem: 9377 +Train: [77] [5600/6250] eta: 0:01:34 lr: 0.000016 grad: 0.1273 (0.1349) loss: 0.8103 (0.8063) time: 0.1497 data: 0.0644 max mem: 9377 +Train: [77] [5700/6250] eta: 0:01:20 lr: 0.000016 grad: 0.1316 (0.1349) loss: 0.8009 (0.8063) time: 0.1577 data: 0.0779 max mem: 9377 +Train: [77] [5800/6250] eta: 0:01:05 lr: 0.000016 grad: 0.1363 (0.1349) loss: 0.8050 (0.8063) time: 0.1363 data: 0.0468 max mem: 9377 +Train: [77] [5900/6250] eta: 0:00:50 lr: 0.000016 grad: 0.1320 (0.1349) loss: 0.8108 (0.8064) time: 0.1118 data: 0.0221 max mem: 9377 +Train: [77] [6000/6250] eta: 0:00:36 lr: 0.000016 grad: 0.1233 (0.1349) loss: 0.8157 (0.8065) time: 0.1327 data: 0.0497 max mem: 9377 +Train: [77] [6100/6250] eta: 0:00:21 lr: 0.000016 grad: 0.1297 (0.1349) loss: 0.8006 (0.8065) time: 0.1370 data: 0.0519 max mem: 9377 +Train: [77] [6200/6250] eta: 0:00:07 lr: 0.000016 grad: 0.1306 (0.1348) loss: 0.8021 (0.8065) time: 0.1197 data: 0.0288 max mem: 9377 +Train: [77] [6249/6250] eta: 0:00:00 lr: 0.000016 grad: 0.1208 (0.1348) loss: 0.8101 (0.8065) time: 0.1477 data: 0.0614 max mem: 9377 +Train: [77] Total time: 0:15:11 (0.1459 s / it) +Averaged stats: lr: 0.000016 grad: 0.1208 (0.1348) loss: 0.8101 (0.8065) +Eval (hcp-train-subset): [77] [ 0/62] eta: 0:06:19 loss: 0.8252 (0.8252) time: 6.1216 data: 6.0906 max mem: 9377 +Eval (hcp-train-subset): [77] [61/62] eta: 0:00:00 loss: 0.8161 (0.8172) time: 0.1290 data: 0.1036 max mem: 9377 +Eval (hcp-train-subset): [77] Total time: 0:00:14 (0.2411 s / it) +Averaged stats (hcp-train-subset): loss: 0.8161 (0.8172) +Eval (hcp-val): [77] [ 0/62] eta: 0:06:16 loss: 0.8282 (0.8282) time: 6.0778 data: 6.0408 max mem: 9377 +Eval (hcp-val): [77] [61/62] eta: 0:00:00 loss: 0.8300 (0.8313) time: 0.1253 data: 0.1000 max mem: 9377 +Eval (hcp-val): [77] Total time: 0:00:14 (0.2335 s / it) +Averaged stats (hcp-val): loss: 0.8300 (0.8313) +Eval (nsd-val): [77] [ 0/62] eta: 0:03:56 loss: 0.8030 (0.8030) time: 3.8071 data: 3.7273 max mem: 9377 +Eval (nsd-val): [77] [61/62] eta: 0:00:00 loss: 0.8117 (0.8138) time: 0.2904 data: 0.2639 max mem: 9377 +Eval (nsd-val): [77] Total time: 0:00:17 (0.2899 s / it) +Averaged stats (nsd-val): loss: 0.8117 (0.8138) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [78] [ 0/6250] eta: 15:36:37 lr: 0.000016 grad: 0.3155 (0.3155) loss: 0.7876 (0.7876) time: 8.9916 data: 8.8872 max mem: 9377 +Train: [78] [ 100/6250] eta: 0:24:05 lr: 0.000016 grad: 0.1275 (0.1720) loss: 0.8089 (0.8177) time: 0.1532 data: 0.0547 max mem: 9377 +Train: [78] [ 200/6250] eta: 0:20:15 lr: 0.000016 grad: 0.1480 (0.1698) loss: 0.8081 (0.8117) time: 0.1752 data: 0.0768 max mem: 9377 +Train: [78] [ 300/6250] eta: 0:18:33 lr: 0.000016 grad: 0.1328 (0.1629) loss: 0.8135 (0.8098) time: 0.1778 data: 0.0965 max mem: 9377 +Train: [78] [ 400/6250] eta: 0:17:17 lr: 0.000016 grad: 0.1356 (0.1576) loss: 0.8133 (0.8094) time: 0.1273 data: 0.0350 max mem: 9377 +Train: [78] [ 500/6250] eta: 0:16:23 lr: 0.000016 grad: 0.1353 (0.1545) loss: 0.8012 (0.8090) time: 0.1226 data: 0.0351 max mem: 9377 +Train: [78] [ 600/6250] eta: 0:15:45 lr: 0.000016 grad: 0.1369 (0.1518) loss: 0.8121 (0.8091) time: 0.1633 data: 0.0761 max mem: 9377 +Train: [78] [ 700/6250] eta: 0:15:06 lr: 0.000016 grad: 0.1303 (0.1491) loss: 0.8079 (0.8093) time: 0.1503 data: 0.0490 max mem: 9377 +Train: [78] [ 800/6250] eta: 0:14:30 lr: 0.000016 grad: 0.1254 (0.1470) loss: 0.8171 (0.8098) time: 0.1151 data: 0.0109 max mem: 9377 +Train: [78] [ 900/6250] eta: 0:14:09 lr: 0.000016 grad: 0.1246 (0.1451) loss: 0.8156 (0.8103) time: 0.1642 data: 0.0707 max mem: 9377 +Train: [78] [1000/6250] eta: 0:13:49 lr: 0.000016 grad: 0.1262 (0.1439) loss: 0.8091 (0.8105) time: 0.1352 data: 0.0526 max mem: 9377 +Train: [78] [1100/6250] eta: 0:13:31 lr: 0.000016 grad: 0.1303 (0.1425) loss: 0.8059 (0.8106) time: 0.1630 data: 0.0729 max mem: 9377 +Train: [78] [1200/6250] eta: 0:13:13 lr: 0.000016 grad: 0.1308 (0.1416) loss: 0.8095 (0.8107) time: 0.1636 data: 0.0738 max mem: 9377 +Train: [78] [1300/6250] eta: 0:12:53 lr: 0.000016 grad: 0.1312 (0.1407) loss: 0.8084 (0.8107) time: 0.1318 data: 0.0446 max mem: 9377 +Train: [78] [1400/6250] eta: 0:12:36 lr: 0.000016 grad: 0.1330 (0.1401) loss: 0.8042 (0.8106) time: 0.1574 data: 0.0781 max mem: 9377 +Train: [78] [1500/6250] eta: 0:12:19 lr: 0.000015 grad: 0.1253 (0.1398) loss: 0.8105 (0.8104) time: 0.1682 data: 0.0834 max mem: 9377 +Train: [78] [1600/6250] eta: 0:12:01 lr: 0.000015 grad: 0.1305 (0.1393) loss: 0.8065 (0.8102) time: 0.1446 data: 0.0565 max mem: 9377 +Train: [78] [1700/6250] eta: 0:11:42 lr: 0.000015 grad: 0.1325 (0.1389) loss: 0.8106 (0.8101) time: 0.1219 data: 0.0277 max mem: 9377 +Train: [78] [1800/6250] eta: 0:11:25 lr: 0.000015 grad: 0.1305 (0.1385) loss: 0.8120 (0.8100) time: 0.1479 data: 0.0614 max mem: 9377 +Train: [78] [1900/6250] eta: 0:11:14 lr: 0.000015 grad: 0.1337 (0.1384) loss: 0.8064 (0.8098) time: 0.1715 data: 0.0781 max mem: 9377 +Train: [78] [2000/6250] eta: 0:10:59 lr: 0.000015 grad: 0.1290 (0.1380) loss: 0.8058 (0.8097) time: 0.1530 data: 0.0693 max mem: 9377 +Train: [78] [2100/6250] eta: 0:10:44 lr: 0.000015 grad: 0.1360 (0.1379) loss: 0.8047 (0.8096) time: 0.1538 data: 0.0699 max mem: 9377 +Train: [78] [2200/6250] eta: 0:10:28 lr: 0.000015 grad: 0.1289 (0.1377) loss: 0.8043 (0.8094) time: 0.1465 data: 0.0595 max mem: 9377 +Train: [78] [2300/6250] eta: 0:10:12 lr: 0.000015 grad: 0.1335 (0.1374) loss: 0.8054 (0.8093) time: 0.1573 data: 0.0741 max mem: 9377 +Train: [78] [2400/6250] eta: 0:09:54 lr: 0.000015 grad: 0.1313 (0.1371) loss: 0.8061 (0.8092) time: 0.1315 data: 0.0470 max mem: 9377 +Train: [78] [2500/6250] eta: 0:09:38 lr: 0.000015 grad: 0.1313 (0.1369) loss: 0.8037 (0.8091) time: 0.1523 data: 0.0663 max mem: 9377 +Train: [78] [2600/6250] eta: 0:09:20 lr: 0.000015 grad: 0.1327 (0.1368) loss: 0.8068 (0.8090) time: 0.1310 data: 0.0448 max mem: 9377 +Train: [78] [2700/6250] eta: 0:09:02 lr: 0.000015 grad: 0.1335 (0.1366) loss: 0.8053 (0.8089) time: 0.1303 data: 0.0401 max mem: 9377 +Train: [78] [2800/6250] eta: 0:08:46 lr: 0.000015 grad: 0.1383 (0.1364) loss: 0.8153 (0.8088) time: 0.1482 data: 0.0695 max mem: 9377 +Train: [78] [2900/6250] eta: 0:08:30 lr: 0.000015 grad: 0.1325 (0.1362) loss: 0.8069 (0.8087) time: 0.1553 data: 0.0720 max mem: 9377 +Train: [78] [3000/6250] eta: 0:08:14 lr: 0.000015 grad: 0.1322 (0.1361) loss: 0.8013 (0.8087) time: 0.1369 data: 0.0552 max mem: 9377 +Train: [78] [3100/6250] eta: 0:07:57 lr: 0.000015 grad: 0.1207 (0.1359) loss: 0.8163 (0.8087) time: 0.1335 data: 0.0517 max mem: 9377 +Train: [78] [3200/6250] eta: 0:07:41 lr: 0.000015 grad: 0.1262 (0.1356) loss: 0.8085 (0.8088) time: 0.1444 data: 0.0610 max mem: 9377 +Train: [78] [3300/6250] eta: 0:07:25 lr: 0.000015 grad: 0.1266 (0.1355) loss: 0.8120 (0.8088) time: 0.1499 data: 0.0626 max mem: 9377 +Train: [78] [3400/6250] eta: 0:07:09 lr: 0.000015 grad: 0.1198 (0.1354) loss: 0.8103 (0.8089) time: 0.1169 data: 0.0319 max mem: 9377 +Train: [78] [3500/6250] eta: 0:06:54 lr: 0.000015 grad: 0.1229 (0.1352) loss: 0.8088 (0.8089) time: 0.1519 data: 0.0717 max mem: 9377 +Train: [78] [3600/6250] eta: 0:06:38 lr: 0.000015 grad: 0.1351 (0.1351) loss: 0.8031 (0.8089) time: 0.1445 data: 0.0580 max mem: 9377 +Train: [78] [3700/6250] eta: 0:06:23 lr: 0.000015 grad: 0.1316 (0.1351) loss: 0.8115 (0.8089) time: 0.1510 data: 0.0729 max mem: 9377 +Train: [78] [3800/6250] eta: 0:06:08 lr: 0.000015 grad: 0.1309 (0.1350) loss: 0.8106 (0.8089) time: 0.1498 data: 0.0733 max mem: 9377 +Train: [78] [3900/6250] eta: 0:05:52 lr: 0.000015 grad: 0.1345 (0.1351) loss: 0.8051 (0.8088) time: 0.1549 data: 0.0728 max mem: 9377 +Train: [78] [4000/6250] eta: 0:05:37 lr: 0.000015 grad: 0.1408 (0.1352) loss: 0.8019 (0.8086) time: 0.1652 data: 0.0858 max mem: 9377 +Train: [78] [4100/6250] eta: 0:05:21 lr: 0.000015 grad: 0.1353 (0.1351) loss: 0.8086 (0.8085) time: 0.1495 data: 0.0638 max mem: 9377 +Train: [78] [4200/6250] eta: 0:05:06 lr: 0.000015 grad: 0.1351 (0.1352) loss: 0.7990 (0.8085) time: 0.1386 data: 0.0599 max mem: 9377 +Train: [78] [4300/6250] eta: 0:04:51 lr: 0.000015 grad: 0.1340 (0.1351) loss: 0.8048 (0.8083) time: 0.1898 data: 0.1137 max mem: 9377 +Train: [78] [4400/6250] eta: 0:04:35 lr: 0.000015 grad: 0.1232 (0.1351) loss: 0.8062 (0.8083) time: 0.1420 data: 0.0622 max mem: 9377 +Train: [78] [4500/6250] eta: 0:04:20 lr: 0.000015 grad: 0.1382 (0.1352) loss: 0.8063 (0.8082) time: 0.1464 data: 0.0626 max mem: 9377 +Train: [78] [4600/6250] eta: 0:04:05 lr: 0.000015 grad: 0.1249 (0.1351) loss: 0.8176 (0.8082) time: 0.1437 data: 0.0621 max mem: 9377 +Train: [78] [4700/6250] eta: 0:03:50 lr: 0.000015 grad: 0.1309 (0.1351) loss: 0.8043 (0.8081) time: 0.1580 data: 0.0771 max mem: 9377 +Train: [78] [4800/6250] eta: 0:03:35 lr: 0.000015 grad: 0.1338 (0.1351) loss: 0.8163 (0.8081) time: 0.1521 data: 0.0708 max mem: 9377 +Train: [78] [4900/6250] eta: 0:03:20 lr: 0.000015 grad: 0.1233 (0.1350) loss: 0.8068 (0.8081) time: 0.1525 data: 0.0765 max mem: 9377 +Train: [78] [5000/6250] eta: 0:03:06 lr: 0.000015 grad: 0.1247 (0.1349) loss: 0.8118 (0.8082) time: 0.1511 data: 0.0702 max mem: 9377 +Train: [78] [5100/6250] eta: 0:02:51 lr: 0.000015 grad: 0.1348 (0.1349) loss: 0.8108 (0.8082) time: 0.1393 data: 0.0535 max mem: 9377 +Train: [78] [5200/6250] eta: 0:02:36 lr: 0.000015 grad: 0.1277 (0.1349) loss: 0.8115 (0.8083) time: 0.1520 data: 0.0713 max mem: 9377 +Train: [78] [5300/6250] eta: 0:02:21 lr: 0.000015 grad: 0.1384 (0.1349) loss: 0.8134 (0.8083) time: 0.1439 data: 0.0592 max mem: 9377 +Train: [78] [5400/6250] eta: 0:02:06 lr: 0.000015 grad: 0.1323 (0.1349) loss: 0.8109 (0.8083) time: 0.1849 data: 0.1071 max mem: 9377 +Train: [78] [5500/6250] eta: 0:01:51 lr: 0.000015 grad: 0.1217 (0.1348) loss: 0.8141 (0.8083) time: 0.1544 data: 0.0715 max mem: 9377 +Train: [78] [5600/6250] eta: 0:01:37 lr: 0.000015 grad: 0.1246 (0.1348) loss: 0.8156 (0.8083) time: 0.1452 data: 0.0669 max mem: 9377 +Train: [78] [5700/6250] eta: 0:01:22 lr: 0.000015 grad: 0.1290 (0.1347) loss: 0.8117 (0.8083) time: 0.1635 data: 0.0827 max mem: 9377 +Train: [78] [5800/6250] eta: 0:01:07 lr: 0.000015 grad: 0.1347 (0.1347) loss: 0.8057 (0.8083) time: 0.1470 data: 0.0627 max mem: 9377 +Train: [78] [5900/6250] eta: 0:00:52 lr: 0.000015 grad: 0.1245 (0.1347) loss: 0.8143 (0.8083) time: 0.1368 data: 0.0485 max mem: 9377 +Train: [78] [6000/6250] eta: 0:00:37 lr: 0.000015 grad: 0.1260 (0.1347) loss: 0.8106 (0.8084) time: 0.1382 data: 0.0561 max mem: 9377 +Train: [78] [6100/6250] eta: 0:00:22 lr: 0.000015 grad: 0.1315 (0.1347) loss: 0.8031 (0.8084) time: 0.1599 data: 0.0824 max mem: 9377 +Train: [78] [6200/6250] eta: 0:00:07 lr: 0.000014 grad: 0.1312 (0.1346) loss: 0.8216 (0.8084) time: 0.1223 data: 0.0390 max mem: 9377 +Train: [78] [6249/6250] eta: 0:00:00 lr: 0.000014 grad: 0.1289 (0.1346) loss: 0.8176 (0.8085) time: 0.1774 data: 0.0784 max mem: 9377 +Train: [78] Total time: 0:15:36 (0.1499 s / it) +Averaged stats: lr: 0.000014 grad: 0.1289 (0.1346) loss: 0.8176 (0.8085) +Eval (hcp-train-subset): [78] [ 0/62] eta: 0:05:15 loss: 0.8234 (0.8234) time: 5.0898 data: 5.0593 max mem: 9377 +Eval (hcp-train-subset): [78] [61/62] eta: 0:00:00 loss: 0.8153 (0.8168) time: 0.1407 data: 0.1157 max mem: 9377 +Eval (hcp-train-subset): [78] Total time: 0:00:14 (0.2292 s / it) +Averaged stats (hcp-train-subset): loss: 0.8153 (0.8168) +Eval (hcp-val): [78] [ 0/62] eta: 0:03:32 loss: 0.8269 (0.8269) time: 3.4271 data: 3.3667 max mem: 9377 +Eval (hcp-val): [78] [61/62] eta: 0:00:00 loss: 0.8292 (0.8306) time: 0.1223 data: 0.0972 max mem: 9377 +Eval (hcp-val): [78] Total time: 0:00:13 (0.2232 s / it) +Averaged stats (hcp-val): loss: 0.8292 (0.8306) +Eval (nsd-val): [78] [ 0/62] eta: 0:03:30 loss: 0.7989 (0.7989) time: 3.4012 data: 3.3160 max mem: 9377 +Eval (nsd-val): [78] [61/62] eta: 0:00:00 loss: 0.8107 (0.8118) time: 0.1390 data: 0.1139 max mem: 9377 +Eval (nsd-val): [78] Total time: 0:00:13 (0.2204 s / it) +Averaged stats (nsd-val): loss: 0.8107 (0.8118) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [79] [ 0/6250] eta: 9:43:24 lr: 0.000014 grad: 0.0965 (0.0965) loss: 0.8730 (0.8730) time: 5.6007 data: 5.4624 max mem: 9377 +Train: [79] [ 100/6250] eta: 0:20:08 lr: 0.000014 grad: 0.1673 (0.1741) loss: 0.8188 (0.8238) time: 0.1522 data: 0.0725 max mem: 9377 +Train: [79] [ 200/6250] eta: 0:17:29 lr: 0.000014 grad: 0.1437 (0.1624) loss: 0.8161 (0.8182) time: 0.1570 data: 0.0599 max mem: 9377 +Train: [79] [ 300/6250] eta: 0:16:44 lr: 0.000014 grad: 0.1419 (0.1564) loss: 0.8001 (0.8153) time: 0.1800 data: 0.0811 max mem: 9377 +Train: [79] [ 400/6250] eta: 0:16:13 lr: 0.000014 grad: 0.1393 (0.1528) loss: 0.8080 (0.8133) time: 0.1622 data: 0.0631 max mem: 9377 +Train: [79] [ 500/6250] eta: 0:15:47 lr: 0.000014 grad: 0.1274 (0.1496) loss: 0.8103 (0.8125) time: 0.1544 data: 0.0502 max mem: 9377 +Train: [79] [ 600/6250] eta: 0:15:16 lr: 0.000014 grad: 0.1346 (0.1489) loss: 0.8100 (0.8113) time: 0.1358 data: 0.0529 max mem: 9377 +Train: [79] [ 700/6250] eta: 0:14:51 lr: 0.000014 grad: 0.1374 (0.1480) loss: 0.8108 (0.8108) time: 0.1401 data: 0.0561 max mem: 9377 +Train: [79] [ 800/6250] eta: 0:14:35 lr: 0.000014 grad: 0.1356 (0.1474) loss: 0.8060 (0.8104) time: 0.1643 data: 0.0738 max mem: 9377 +Train: [79] [ 900/6250] eta: 0:14:23 lr: 0.000014 grad: 0.1397 (0.1463) loss: 0.8065 (0.8101) time: 0.1974 data: 0.1053 max mem: 9377 +Train: [79] [1000/6250] eta: 0:14:14 lr: 0.000014 grad: 0.1336 (0.1457) loss: 0.8090 (0.8098) time: 0.1849 data: 0.1059 max mem: 9377 +Train: [79] [1100/6250] eta: 0:13:48 lr: 0.000014 grad: 0.1354 (0.1450) loss: 0.8080 (0.8097) time: 0.1502 data: 0.0631 max mem: 9377 +Train: [79] [1200/6250] eta: 0:13:28 lr: 0.000014 grad: 0.1380 (0.1446) loss: 0.8089 (0.8093) time: 0.1482 data: 0.0592 max mem: 9377 +Train: [79] [1300/6250] eta: 0:13:11 lr: 0.000014 grad: 0.1377 (0.1440) loss: 0.8044 (0.8091) time: 0.1446 data: 0.0591 max mem: 9377 +Train: [79] [1400/6250] eta: 0:12:50 lr: 0.000014 grad: 0.1387 (0.1438) loss: 0.8085 (0.8089) time: 0.1309 data: 0.0468 max mem: 9377 +Train: [79] [1500/6250] eta: 0:12:33 lr: 0.000014 grad: 0.1434 (0.1437) loss: 0.7998 (0.8085) time: 0.1422 data: 0.0633 max mem: 9377 +Train: [79] [1600/6250] eta: 0:12:18 lr: 0.000014 grad: 0.1310 (0.1437) loss: 0.8055 (0.8082) time: 0.1889 data: 0.1029 max mem: 9377 +Train: [79] [1700/6250] eta: 0:12:01 lr: 0.000014 grad: 0.1329 (0.1435) loss: 0.8044 (0.8079) time: 0.1578 data: 0.0752 max mem: 9377 +Train: [79] [1800/6250] eta: 0:11:43 lr: 0.000014 grad: 0.1282 (0.1432) loss: 0.8104 (0.8078) time: 0.1558 data: 0.0676 max mem: 9377 +Train: [79] [1900/6250] eta: 0:11:27 lr: 0.000014 grad: 0.1395 (0.1428) loss: 0.8000 (0.8076) time: 0.1586 data: 0.0708 max mem: 9377 +Train: [79] [2000/6250] eta: 0:11:11 lr: 0.000014 grad: 0.1477 (0.1428) loss: 0.8012 (0.8074) time: 0.1519 data: 0.0658 max mem: 9377 +Train: [79] [2100/6250] eta: 0:10:55 lr: 0.000014 grad: 0.1392 (0.1428) loss: 0.8080 (0.8071) time: 0.1214 data: 0.0315 max mem: 9377 +Train: [79] [2200/6250] eta: 0:10:40 lr: 0.000014 grad: 0.1394 (0.1425) loss: 0.8050 (0.8070) time: 0.1743 data: 0.0950 max mem: 9377 +Train: [79] [2300/6250] eta: 0:10:24 lr: 0.000014 grad: 0.1308 (0.1423) loss: 0.8030 (0.8069) time: 0.1812 data: 0.0957 max mem: 9377 +Train: [79] [2400/6250] eta: 0:10:05 lr: 0.000014 grad: 0.1294 (0.1419) loss: 0.8030 (0.8069) time: 0.1292 data: 0.0368 max mem: 9377 +Train: [79] [2500/6250] eta: 0:09:48 lr: 0.000014 grad: 0.1345 (0.1416) loss: 0.7981 (0.8068) time: 0.1403 data: 0.0586 max mem: 9377 +Train: [79] [2600/6250] eta: 0:09:30 lr: 0.000014 grad: 0.1343 (0.1412) loss: 0.7985 (0.8069) time: 0.1433 data: 0.0514 max mem: 9377 +Train: [79] [2700/6250] eta: 0:09:13 lr: 0.000014 grad: 0.1365 (0.1411) loss: 0.7987 (0.8069) time: 0.1510 data: 0.0700 max mem: 9377 +Train: [79] [2800/6250] eta: 0:08:57 lr: 0.000014 grad: 0.1266 (0.1408) loss: 0.8087 (0.8069) time: 0.1577 data: 0.0758 max mem: 9377 +Train: [79] [2900/6250] eta: 0:08:42 lr: 0.000014 grad: 0.1319 (0.1406) loss: 0.7995 (0.8069) time: 0.1596 data: 0.0776 max mem: 9377 +Train: [79] [3000/6250] eta: 0:08:27 lr: 0.000014 grad: 0.1392 (0.1404) loss: 0.8078 (0.8068) time: 0.1822 data: 0.0988 max mem: 9377 +Train: [79] [3100/6250] eta: 0:08:10 lr: 0.000014 grad: 0.1297 (0.1403) loss: 0.8057 (0.8069) time: 0.1302 data: 0.0446 max mem: 9377 +Train: [79] [3200/6250] eta: 0:07:55 lr: 0.000014 grad: 0.1329 (0.1403) loss: 0.8064 (0.8068) time: 0.1393 data: 0.0580 max mem: 9377 +Train: [79] [3300/6250] eta: 0:07:40 lr: 0.000014 grad: 0.1389 (0.1403) loss: 0.8047 (0.8068) time: 0.1593 data: 0.0714 max mem: 9377 +Train: [79] [3400/6250] eta: 0:07:24 lr: 0.000014 grad: 0.1437 (0.1402) loss: 0.8029 (0.8067) time: 0.1580 data: 0.0746 max mem: 9377 +Train: [79] [3500/6250] eta: 0:07:08 lr: 0.000014 grad: 0.1331 (0.1400) loss: 0.8069 (0.8067) time: 0.1521 data: 0.0710 max mem: 9377 +Train: [79] [3600/6250] eta: 0:06:52 lr: 0.000014 grad: 0.1235 (0.1399) loss: 0.8078 (0.8067) time: 0.1500 data: 0.0625 max mem: 9377 +Train: [79] [3700/6250] eta: 0:06:36 lr: 0.000014 grad: 0.1297 (0.1399) loss: 0.8034 (0.8067) time: 0.1552 data: 0.0719 max mem: 9377 +Train: [79] [3800/6250] eta: 0:06:20 lr: 0.000014 grad: 0.1358 (0.1398) loss: 0.8010 (0.8066) time: 0.1105 data: 0.0190 max mem: 9377 +Train: [79] [3900/6250] eta: 0:06:03 lr: 0.000014 grad: 0.1328 (0.1397) loss: 0.8107 (0.8066) time: 0.1379 data: 0.0555 max mem: 9377 +Train: [79] [4000/6250] eta: 0:05:47 lr: 0.000014 grad: 0.1408 (0.1397) loss: 0.8000 (0.8066) time: 0.1362 data: 0.0534 max mem: 9377 +Train: [79] [4100/6250] eta: 0:05:31 lr: 0.000014 grad: 0.1368 (0.1397) loss: 0.8016 (0.8064) time: 0.1493 data: 0.0599 max mem: 9377 +Train: [79] [4200/6250] eta: 0:05:15 lr: 0.000014 grad: 0.1374 (0.1397) loss: 0.8012 (0.8063) time: 0.1826 data: 0.0986 max mem: 9377 +Train: [79] [4300/6250] eta: 0:04:59 lr: 0.000014 grad: 0.1390 (0.1397) loss: 0.7973 (0.8063) time: 0.1512 data: 0.0637 max mem: 9377 +Train: [79] [4400/6250] eta: 0:04:43 lr: 0.000014 grad: 0.1362 (0.1397) loss: 0.8043 (0.8063) time: 0.1288 data: 0.0484 max mem: 9377 +Train: [79] [4500/6250] eta: 0:04:28 lr: 0.000014 grad: 0.1461 (0.1398) loss: 0.8003 (0.8062) time: 0.1286 data: 0.0442 max mem: 9377 +Train: [79] [4600/6250] eta: 0:04:12 lr: 0.000014 grad: 0.1404 (0.1398) loss: 0.8038 (0.8062) time: 0.1470 data: 0.0625 max mem: 9377 +Train: [79] [4700/6250] eta: 0:03:56 lr: 0.000013 grad: 0.1361 (0.1398) loss: 0.8077 (0.8062) time: 0.1722 data: 0.0924 max mem: 9377 +Train: [79] [4800/6250] eta: 0:03:40 lr: 0.000013 grad: 0.1361 (0.1397) loss: 0.8113 (0.8062) time: 0.1477 data: 0.0644 max mem: 9377 +Train: [79] [4900/6250] eta: 0:03:25 lr: 0.000013 grad: 0.1353 (0.1397) loss: 0.8043 (0.8062) time: 0.1261 data: 0.0440 max mem: 9377 +Train: [79] [5000/6250] eta: 0:03:09 lr: 0.000013 grad: 0.1322 (0.1397) loss: 0.8057 (0.8062) time: 0.1570 data: 0.0766 max mem: 9377 +Train: [79] [5100/6250] eta: 0:02:54 lr: 0.000013 grad: 0.1337 (0.1398) loss: 0.8080 (0.8062) time: 0.1375 data: 0.0461 max mem: 9377 +Train: [79] [5200/6250] eta: 0:02:38 lr: 0.000013 grad: 0.1472 (0.1397) loss: 0.8022 (0.8062) time: 0.1435 data: 0.0557 max mem: 9377 +Train: [79] [5300/6250] eta: 0:02:23 lr: 0.000013 grad: 0.1313 (0.1397) loss: 0.8012 (0.8062) time: 0.1724 data: 0.0891 max mem: 9377 +Train: [79] [5400/6250] eta: 0:02:08 lr: 0.000013 grad: 0.1389 (0.1397) loss: 0.8035 (0.8062) time: 0.1618 data: 0.0830 max mem: 9377 +Train: [79] [5500/6250] eta: 0:01:52 lr: 0.000013 grad: 0.1265 (0.1396) loss: 0.8136 (0.8063) time: 0.1368 data: 0.0517 max mem: 9377 +Train: [79] [5600/6250] eta: 0:01:37 lr: 0.000013 grad: 0.1293 (0.1396) loss: 0.8081 (0.8062) time: 0.1391 data: 0.0597 max mem: 9377 +Train: [79] [5700/6250] eta: 0:01:22 lr: 0.000013 grad: 0.1288 (0.1395) loss: 0.8236 (0.8063) time: 0.1288 data: 0.0424 max mem: 9377 +Train: [79] [5800/6250] eta: 0:01:07 lr: 0.000013 grad: 0.1284 (0.1394) loss: 0.8146 (0.8063) time: 0.1518 data: 0.0677 max mem: 9377 +Train: [79] [5900/6250] eta: 0:00:52 lr: 0.000013 grad: 0.1326 (0.1394) loss: 0.8087 (0.8063) time: 0.1661 data: 0.0884 max mem: 9377 +Train: [79] [6000/6250] eta: 0:00:37 lr: 0.000013 grad: 0.1273 (0.1393) loss: 0.8109 (0.8064) time: 0.1751 data: 0.0966 max mem: 9377 +Train: [79] [6100/6250] eta: 0:00:22 lr: 0.000013 grad: 0.1354 (0.1393) loss: 0.8040 (0.8064) time: 0.1399 data: 0.0622 max mem: 9377 +Train: [79] [6200/6250] eta: 0:00:07 lr: 0.000013 grad: 0.1266 (0.1393) loss: 0.8107 (0.8064) time: 0.1833 data: 0.0893 max mem: 9377 +Train: [79] [6249/6250] eta: 0:00:00 lr: 0.000013 grad: 0.1293 (0.1392) loss: 0.8055 (0.8064) time: 0.1625 data: 0.0745 max mem: 9377 +Train: [79] Total time: 0:15:48 (0.1517 s / it) +Averaged stats: lr: 0.000013 grad: 0.1293 (0.1392) loss: 0.8055 (0.8064) +Eval (hcp-train-subset): [79] [ 0/62] eta: 0:04:55 loss: 0.8223 (0.8223) time: 4.7664 data: 4.7358 max mem: 9377 +Eval (hcp-train-subset): [79] [61/62] eta: 0:00:00 loss: 0.8142 (0.8156) time: 0.1229 data: 0.0976 max mem: 9377 +Eval (hcp-train-subset): [79] Total time: 0:00:13 (0.2195 s / it) +Averaged stats (hcp-train-subset): loss: 0.8142 (0.8156) +Making plots (hcp-train-subset): example=38 +Eval (hcp-val): [79] [ 0/62] eta: 0:03:59 loss: 0.8267 (0.8267) time: 3.8703 data: 3.8170 max mem: 9377 +Eval (hcp-val): [79] [61/62] eta: 0:00:00 loss: 0.8282 (0.8306) time: 0.1198 data: 0.0947 max mem: 9377 +Eval (hcp-val): [79] Total time: 0:00:13 (0.2192 s / it) +Averaged stats (hcp-val): loss: 0.8282 (0.8306) +Making plots (hcp-val): example=8 +Eval (nsd-val): [79] [ 0/62] eta: 0:05:30 loss: 0.7994 (0.7994) time: 5.3383 data: 5.3044 max mem: 9377 +Eval (nsd-val): [79] [61/62] eta: 0:00:00 loss: 0.8082 (0.8112) time: 0.1199 data: 0.0950 max mem: 9377 +Eval (nsd-val): [79] Total time: 0:00:13 (0.2121 s / it) +Averaged stats (nsd-val): loss: 0.8082 (0.8112) +Making plots (nsd-val): example=10 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00079.pth +Train: [80] [ 0/6250] eta: 10:09:32 lr: 0.000013 grad: 0.0844 (0.0844) loss: 0.8666 (0.8666) time: 5.8517 data: 5.7109 max mem: 9377 +Train: [80] [ 100/6250] eta: 0:20:32 lr: 0.000013 grad: 0.1556 (0.1979) loss: 0.8106 (0.8112) time: 0.1681 data: 0.0770 max mem: 9377 +Train: [80] [ 200/6250] eta: 0:17:40 lr: 0.000013 grad: 0.1567 (0.1781) loss: 0.7983 (0.8095) time: 0.1476 data: 0.0466 max mem: 9377 +Train: [80] [ 300/6250] eta: 0:16:21 lr: 0.000013 grad: 0.1375 (0.1720) loss: 0.8000 (0.8064) time: 0.1490 data: 0.0452 max mem: 9377 +Train: [80] [ 400/6250] eta: 0:15:23 lr: 0.000013 grad: 0.1461 (0.1678) loss: 0.8058 (0.8051) time: 0.1343 data: 0.0273 max mem: 9377 +Train: [80] [ 500/6250] eta: 0:14:47 lr: 0.000013 grad: 0.1270 (0.1642) loss: 0.8144 (0.8049) time: 0.1527 data: 0.0661 max mem: 9377 +Train: [80] [ 600/6250] eta: 0:14:18 lr: 0.000013 grad: 0.1384 (0.1607) loss: 0.8041 (0.8048) time: 0.1532 data: 0.0608 max mem: 9377 +Train: [80] [ 700/6250] eta: 0:13:56 lr: 0.000013 grad: 0.1364 (0.1584) loss: 0.8138 (0.8051) time: 0.1486 data: 0.0563 max mem: 9377 +Train: [80] [ 800/6250] eta: 0:13:33 lr: 0.000013 grad: 0.1375 (0.1562) loss: 0.8050 (0.8051) time: 0.1283 data: 0.0344 max mem: 9377 +Train: [80] [ 900/6250] eta: 0:13:15 lr: 0.000013 grad: 0.1325 (0.1545) loss: 0.8058 (0.8052) time: 0.1182 data: 0.0300 max mem: 9377 +Train: [80] [1000/6250] eta: 0:12:56 lr: 0.000013 grad: 0.1399 (0.1529) loss: 0.8098 (0.8054) time: 0.1459 data: 0.0621 max mem: 9377 +Train: [80] [1100/6250] eta: 0:12:36 lr: 0.000013 grad: 0.1378 (0.1515) loss: 0.8049 (0.8057) time: 0.1537 data: 0.0692 max mem: 9377 +Train: [80] [1200/6250] eta: 0:12:19 lr: 0.000013 grad: 0.1404 (0.1506) loss: 0.8073 (0.8054) time: 0.1587 data: 0.0744 max mem: 9377 +Train: [80] [1300/6250] eta: 0:12:06 lr: 0.000013 grad: 0.1222 (0.1496) loss: 0.8115 (0.8054) time: 0.1794 data: 0.0951 max mem: 9377 +Train: [80] [1400/6250] eta: 0:11:56 lr: 0.000013 grad: 0.1477 (0.1490) loss: 0.7929 (0.8052) time: 0.1435 data: 0.0648 max mem: 9377 +Train: [80] [1500/6250] eta: 0:11:39 lr: 0.000013 grad: 0.1369 (0.1485) loss: 0.8022 (0.8052) time: 0.1346 data: 0.0564 max mem: 9377 +Train: [80] [1600/6250] eta: 0:11:24 lr: 0.000013 grad: 0.1335 (0.1479) loss: 0.8056 (0.8052) time: 0.1304 data: 0.0477 max mem: 9377 +Train: [80] [1700/6250] eta: 0:11:08 lr: 0.000013 grad: 0.1265 (0.1472) loss: 0.8082 (0.8053) time: 0.1354 data: 0.0506 max mem: 9377 +Train: [80] [1800/6250] eta: 0:10:53 lr: 0.000013 grad: 0.1369 (0.1468) loss: 0.8118 (0.8053) time: 0.1264 data: 0.0437 max mem: 9377 +Train: [80] [1900/6250] eta: 0:10:38 lr: 0.000013 grad: 0.1345 (0.1462) loss: 0.8104 (0.8055) time: 0.1496 data: 0.0684 max mem: 9377 +Train: [80] [2000/6250] eta: 0:10:21 lr: 0.000013 grad: 0.1310 (0.1457) loss: 0.8066 (0.8056) time: 0.1431 data: 0.0607 max mem: 9377 +Train: [80] [2100/6250] eta: 0:10:05 lr: 0.000013 grad: 0.1300 (0.1453) loss: 0.8112 (0.8058) time: 0.1253 data: 0.0238 max mem: 9377 +Train: [80] [2200/6250] eta: 0:09:49 lr: 0.000013 grad: 0.1426 (0.1450) loss: 0.8066 (0.8058) time: 0.1341 data: 0.0447 max mem: 9377 +Train: [80] [2300/6250] eta: 0:09:33 lr: 0.000013 grad: 0.1315 (0.1446) loss: 0.8106 (0.8059) time: 0.1403 data: 0.0570 max mem: 9377 +Train: [80] [2400/6250] eta: 0:09:16 lr: 0.000013 grad: 0.1315 (0.1442) loss: 0.8123 (0.8061) time: 0.1392 data: 0.0586 max mem: 9377 +Train: [80] [2500/6250] eta: 0:08:59 lr: 0.000013 grad: 0.1342 (0.1438) loss: 0.8091 (0.8062) time: 0.1190 data: 0.0372 max mem: 9377 +Train: [80] [2600/6250] eta: 0:08:44 lr: 0.000013 grad: 0.1356 (0.1434) loss: 0.8124 (0.8064) time: 0.1545 data: 0.0734 max mem: 9377 +Train: [80] [2700/6250] eta: 0:08:28 lr: 0.000013 grad: 0.1278 (0.1430) loss: 0.8141 (0.8066) time: 0.1424 data: 0.0591 max mem: 9377 +Train: [80] [2800/6250] eta: 0:08:13 lr: 0.000013 grad: 0.1273 (0.1428) loss: 0.8053 (0.8067) time: 0.1266 data: 0.0409 max mem: 9377 +Train: [80] [2900/6250] eta: 0:07:58 lr: 0.000013 grad: 0.1325 (0.1426) loss: 0.8117 (0.8069) time: 0.1357 data: 0.0541 max mem: 9377 +Train: [80] [3000/6250] eta: 0:07:43 lr: 0.000013 grad: 0.1352 (0.1424) loss: 0.8123 (0.8070) time: 0.1351 data: 0.0477 max mem: 9377 +Train: [80] [3100/6250] eta: 0:07:29 lr: 0.000013 grad: 0.1287 (0.1422) loss: 0.8111 (0.8071) time: 0.1394 data: 0.0534 max mem: 9377 +Train: [80] [3200/6250] eta: 0:07:14 lr: 0.000013 grad: 0.1260 (0.1420) loss: 0.8066 (0.8072) time: 0.1329 data: 0.0524 max mem: 9377 +Train: [80] [3300/6250] eta: 0:07:00 lr: 0.000013 grad: 0.1304 (0.1418) loss: 0.8197 (0.8074) time: 0.1409 data: 0.0564 max mem: 9377 +Train: [80] [3400/6250] eta: 0:06:45 lr: 0.000012 grad: 0.1272 (0.1417) loss: 0.8142 (0.8074) time: 0.1213 data: 0.0394 max mem: 9377 +Train: [80] [3500/6250] eta: 0:06:31 lr: 0.000012 grad: 0.1207 (0.1414) loss: 0.8089 (0.8075) time: 0.1466 data: 0.0703 max mem: 9377 +Train: [80] [3600/6250] eta: 0:06:17 lr: 0.000012 grad: 0.1255 (0.1412) loss: 0.8063 (0.8076) time: 0.1604 data: 0.0780 max mem: 9377 +Train: [80] [3700/6250] eta: 0:06:02 lr: 0.000012 grad: 0.1286 (0.1410) loss: 0.8025 (0.8077) time: 0.1222 data: 0.0378 max mem: 9377 +Train: [80] [3800/6250] eta: 0:05:48 lr: 0.000012 grad: 0.1264 (0.1408) loss: 0.8061 (0.8078) time: 0.1025 data: 0.0142 max mem: 9377 +Train: [80] [3900/6250] eta: 0:05:34 lr: 0.000012 grad: 0.1267 (0.1405) loss: 0.8144 (0.8079) time: 0.1353 data: 0.0519 max mem: 9377 +Train: [80] [4000/6250] eta: 0:05:20 lr: 0.000012 grad: 0.1394 (0.1404) loss: 0.8125 (0.8079) time: 0.1348 data: 0.0522 max mem: 9377 +Train: [80] [4100/6250] eta: 0:05:06 lr: 0.000012 grad: 0.1329 (0.1402) loss: 0.8098 (0.8080) time: 0.1418 data: 0.0586 max mem: 9377 +Train: [80] [4200/6250] eta: 0:04:52 lr: 0.000012 grad: 0.1270 (0.1400) loss: 0.8147 (0.8080) time: 0.1221 data: 0.0467 max mem: 9377 +Train: [80] [4300/6250] eta: 0:04:38 lr: 0.000012 grad: 0.1267 (0.1398) loss: 0.8096 (0.8081) time: 0.1462 data: 0.0644 max mem: 9377 +Train: [80] [4400/6250] eta: 0:04:23 lr: 0.000012 grad: 0.1323 (0.1397) loss: 0.8109 (0.8081) time: 0.1500 data: 0.0728 max mem: 9377 +Train: [80] [4500/6250] eta: 0:04:09 lr: 0.000012 grad: 0.1339 (0.1396) loss: 0.8075 (0.8081) time: 0.1356 data: 0.0595 max mem: 9377 +Train: [80] [4600/6250] eta: 0:03:55 lr: 0.000012 grad: 0.1307 (0.1394) loss: 0.8080 (0.8082) time: 0.1277 data: 0.0412 max mem: 9377 +Train: [80] [4700/6250] eta: 0:03:40 lr: 0.000012 grad: 0.1294 (0.1394) loss: 0.8059 (0.8082) time: 0.1366 data: 0.0542 max mem: 9377 +Train: [80] [4800/6250] eta: 0:03:26 lr: 0.000012 grad: 0.1332 (0.1392) loss: 0.8124 (0.8082) time: 0.1360 data: 0.0552 max mem: 9377 +Train: [80] [4900/6250] eta: 0:03:12 lr: 0.000012 grad: 0.1329 (0.1391) loss: 0.8098 (0.8083) time: 0.1430 data: 0.0630 max mem: 9377 +Train: [80] [5000/6250] eta: 0:02:57 lr: 0.000012 grad: 0.1328 (0.1390) loss: 0.8109 (0.8084) time: 0.1140 data: 0.0310 max mem: 9377 +Train: [80] [5100/6250] eta: 0:02:43 lr: 0.000012 grad: 0.1334 (0.1389) loss: 0.8150 (0.8085) time: 0.1651 data: 0.0812 max mem: 9377 +Train: [80] [5200/6250] eta: 0:02:29 lr: 0.000012 grad: 0.1310 (0.1389) loss: 0.8108 (0.8085) time: 0.1557 data: 0.0808 max mem: 9377 +Train: [80] [5300/6250] eta: 0:02:15 lr: 0.000012 grad: 0.1309 (0.1388) loss: 0.8117 (0.8085) time: 0.1584 data: 0.0743 max mem: 9377 +Train: [80] [5400/6250] eta: 0:02:01 lr: 0.000012 grad: 0.1284 (0.1387) loss: 0.8146 (0.8086) time: 0.1467 data: 0.0628 max mem: 9377 +Train: [80] [5500/6250] eta: 0:01:47 lr: 0.000012 grad: 0.1340 (0.1387) loss: 0.8112 (0.8086) time: 0.1765 data: 0.0941 max mem: 9377 +Train: [80] [5600/6250] eta: 0:01:32 lr: 0.000012 grad: 0.1347 (0.1386) loss: 0.8117 (0.8086) time: 0.1637 data: 0.0835 max mem: 9377 +Train: [80] [5700/6250] eta: 0:01:18 lr: 0.000012 grad: 0.1298 (0.1385) loss: 0.8132 (0.8087) time: 0.1519 data: 0.0665 max mem: 9377 +Train: [80] [5800/6250] eta: 0:01:04 lr: 0.000012 grad: 0.1343 (0.1384) loss: 0.8132 (0.8087) time: 0.1521 data: 0.0589 max mem: 9377 +Train: [80] [5900/6250] eta: 0:00:50 lr: 0.000012 grad: 0.1296 (0.1384) loss: 0.8073 (0.8087) time: 0.1686 data: 0.0825 max mem: 9377 +Train: [80] [6000/6250] eta: 0:00:35 lr: 0.000012 grad: 0.1355 (0.1383) loss: 0.8099 (0.8088) time: 0.1596 data: 0.0856 max mem: 9377 +Train: [80] [6100/6250] eta: 0:00:21 lr: 0.000012 grad: 0.1357 (0.1383) loss: 0.8033 (0.8088) time: 0.1479 data: 0.0635 max mem: 9377 +Train: [80] [6200/6250] eta: 0:00:07 lr: 0.000012 grad: 0.1303 (0.1382) loss: 0.8087 (0.8089) time: 0.1536 data: 0.0682 max mem: 9377 +Train: [80] [6249/6250] eta: 0:00:00 lr: 0.000012 grad: 0.1322 (0.1382) loss: 0.8121 (0.8089) time: 0.1486 data: 0.0614 max mem: 9377 +Train: [80] Total time: 0:15:06 (0.1450 s / it) +Averaged stats: lr: 0.000012 grad: 0.1322 (0.1382) loss: 0.8121 (0.8089) +Eval (hcp-train-subset): [80] [ 0/62] eta: 0:06:14 loss: 0.8228 (0.8228) time: 6.0337 data: 6.0026 max mem: 9377 +Eval (hcp-train-subset): [80] [61/62] eta: 0:00:00 loss: 0.8141 (0.8161) time: 0.1069 data: 0.0812 max mem: 9377 +Eval (hcp-train-subset): [80] Total time: 0:00:13 (0.2155 s / it) +Averaged stats (hcp-train-subset): loss: 0.8141 (0.8161) +Eval (hcp-val): [80] [ 0/62] eta: 0:03:37 loss: 0.8292 (0.8292) time: 3.5012 data: 3.4387 max mem: 9377 +Eval (hcp-val): [80] [61/62] eta: 0:00:00 loss: 0.8277 (0.8305) time: 0.1159 data: 0.0909 max mem: 9377 +Eval (hcp-val): [80] Total time: 0:00:12 (0.2072 s / it) +Averaged stats (hcp-val): loss: 0.8277 (0.8305) +Eval (nsd-val): [80] [ 0/62] eta: 0:04:45 loss: 0.7998 (0.7998) time: 4.6000 data: 4.5529 max mem: 9377 +Eval (nsd-val): [80] [61/62] eta: 0:00:00 loss: 0.8068 (0.8094) time: 0.1195 data: 0.0943 max mem: 9377 +Eval (nsd-val): [80] Total time: 0:00:12 (0.2061 s / it) +Averaged stats (nsd-val): loss: 0.8068 (0.8094) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [81] [ 0/6250] eta: 7:21:23 lr: 0.000012 grad: 0.1239 (0.1239) loss: 0.8482 (0.8482) time: 4.2374 data: 4.0050 max mem: 9377 +Train: [81] [ 100/6250] eta: 0:19:57 lr: 0.000012 grad: 0.1368 (0.1751) loss: 0.8159 (0.8086) time: 0.1485 data: 0.0378 max mem: 9377 +Train: [81] [ 200/6250] eta: 0:17:12 lr: 0.000012 grad: 0.1340 (0.1604) loss: 0.8117 (0.8100) time: 0.1421 data: 0.0417 max mem: 9377 +Train: [81] [ 300/6250] eta: 0:15:47 lr: 0.000012 grad: 0.1544 (0.1602) loss: 0.8070 (0.8081) time: 0.1303 data: 0.0337 max mem: 9377 +Train: [81] [ 400/6250] eta: 0:14:58 lr: 0.000012 grad: 0.1456 (0.1583) loss: 0.8001 (0.8068) time: 0.1410 data: 0.0537 max mem: 9377 +Train: [81] [ 500/6250] eta: 0:14:25 lr: 0.000012 grad: 0.1304 (0.1545) loss: 0.8184 (0.8072) time: 0.1061 data: 0.0003 max mem: 9377 +Train: [81] [ 600/6250] eta: 0:13:48 lr: 0.000012 grad: 0.1323 (0.1518) loss: 0.8105 (0.8077) time: 0.1377 data: 0.0386 max mem: 9377 +Train: [81] [ 700/6250] eta: 0:13:22 lr: 0.000012 grad: 0.1363 (0.1497) loss: 0.8139 (0.8083) time: 0.1410 data: 0.0469 max mem: 9377 +Train: [81] [ 800/6250] eta: 0:13:19 lr: 0.000012 grad: 0.1283 (0.1476) loss: 0.8165 (0.8090) time: 0.1961 data: 0.1089 max mem: 9377 +Train: [81] [ 900/6250] eta: 0:13:12 lr: 0.000012 grad: 0.1304 (0.1456) loss: 0.8121 (0.8095) time: 0.1584 data: 0.0675 max mem: 9377 +Train: [81] [1000/6250] eta: 0:13:12 lr: 0.000012 grad: 0.1269 (0.1444) loss: 0.8140 (0.8098) time: 0.1899 data: 0.1054 max mem: 9377 +Train: [81] [1100/6250] eta: 0:13:07 lr: 0.000012 grad: 0.1272 (0.1434) loss: 0.8123 (0.8100) time: 0.1741 data: 0.0953 max mem: 9377 +Train: [81] [1200/6250] eta: 0:12:59 lr: 0.000012 grad: 0.1252 (0.1426) loss: 0.8144 (0.8100) time: 0.1968 data: 0.1049 max mem: 9377 +Train: [81] [1300/6250] eta: 0:12:41 lr: 0.000012 grad: 0.1316 (0.1416) loss: 0.8110 (0.8101) time: 0.1290 data: 0.0429 max mem: 9377 +Train: [81] [1400/6250] eta: 0:12:25 lr: 0.000012 grad: 0.1246 (0.1412) loss: 0.8078 (0.8099) time: 0.1538 data: 0.0694 max mem: 9377 +Train: [81] [1500/6250] eta: 0:12:10 lr: 0.000012 grad: 0.1333 (0.1409) loss: 0.8128 (0.8098) time: 0.1422 data: 0.0569 max mem: 9377 +Train: [81] [1600/6250] eta: 0:11:56 lr: 0.000012 grad: 0.1345 (0.1406) loss: 0.8044 (0.8095) time: 0.1582 data: 0.0788 max mem: 9377 +Train: [81] [1700/6250] eta: 0:11:43 lr: 0.000012 grad: 0.1250 (0.1403) loss: 0.8113 (0.8094) time: 0.1613 data: 0.0710 max mem: 9377 +Train: [81] [1800/6250] eta: 0:11:26 lr: 0.000012 grad: 0.1311 (0.1400) loss: 0.8103 (0.8093) time: 0.1371 data: 0.0432 max mem: 9377 +Train: [81] [1900/6250] eta: 0:11:09 lr: 0.000012 grad: 0.1391 (0.1402) loss: 0.8077 (0.8091) time: 0.1449 data: 0.0601 max mem: 9377 +Train: [81] [2000/6250] eta: 0:10:50 lr: 0.000012 grad: 0.1244 (0.1400) loss: 0.8073 (0.8090) time: 0.1497 data: 0.0725 max mem: 9377 +Train: [81] [2100/6250] eta: 0:10:33 lr: 0.000012 grad: 0.1383 (0.1400) loss: 0.8146 (0.8088) time: 0.1585 data: 0.0761 max mem: 9377 +Train: [81] [2200/6250] eta: 0:10:20 lr: 0.000012 grad: 0.1311 (0.1399) loss: 0.8146 (0.8088) time: 0.1873 data: 0.1103 max mem: 9377 +Train: [81] [2300/6250] eta: 0:10:07 lr: 0.000011 grad: 0.1279 (0.1397) loss: 0.8084 (0.8087) time: 0.1754 data: 0.0945 max mem: 9377 +Train: [81] [2400/6250] eta: 0:09:54 lr: 0.000011 grad: 0.1279 (0.1394) loss: 0.8135 (0.8088) time: 0.1680 data: 0.0805 max mem: 9377 +Train: [81] [2500/6250] eta: 0:09:42 lr: 0.000011 grad: 0.1267 (0.1393) loss: 0.8159 (0.8089) time: 0.1824 data: 0.1100 max mem: 9377 +Train: [81] [2600/6250] eta: 0:09:28 lr: 0.000011 grad: 0.1256 (0.1390) loss: 0.8147 (0.8091) time: 0.1723 data: 0.0971 max mem: 9377 +Train: [81] [2700/6250] eta: 0:09:16 lr: 0.000011 grad: 0.1282 (0.1387) loss: 0.8154 (0.8091) time: 0.1732 data: 0.0912 max mem: 9377 +Train: [81] [2800/6250] eta: 0:09:03 lr: 0.000011 grad: 0.1306 (0.1385) loss: 0.8100 (0.8092) time: 0.1755 data: 0.1000 max mem: 9377 +Train: [81] [2900/6250] eta: 0:08:48 lr: 0.000011 grad: 0.1287 (0.1383) loss: 0.8150 (0.8092) time: 0.1471 data: 0.0635 max mem: 9377 +Train: [81] [3000/6250] eta: 0:08:33 lr: 0.000011 grad: 0.1392 (0.1380) loss: 0.8092 (0.8093) time: 0.1624 data: 0.0802 max mem: 9377 +Train: [81] [3100/6250] eta: 0:08:17 lr: 0.000011 grad: 0.1299 (0.1379) loss: 0.8176 (0.8094) time: 0.1409 data: 0.0532 max mem: 9377 +Train: [81] [3200/6250] eta: 0:08:00 lr: 0.000011 grad: 0.1342 (0.1379) loss: 0.8146 (0.8094) time: 0.1325 data: 0.0465 max mem: 9377 +Train: [81] [3300/6250] eta: 0:07:43 lr: 0.000011 grad: 0.1402 (0.1379) loss: 0.8029 (0.8094) time: 0.1169 data: 0.0247 max mem: 9377 +Train: [81] [3400/6250] eta: 0:07:25 lr: 0.000011 grad: 0.1287 (0.1378) loss: 0.8084 (0.8094) time: 0.1383 data: 0.0489 max mem: 9377 +Train: [81] [3500/6250] eta: 0:07:09 lr: 0.000011 grad: 0.1356 (0.1376) loss: 0.8109 (0.8095) time: 0.1338 data: 0.0427 max mem: 9377 +Train: [81] [3600/6250] eta: 0:06:52 lr: 0.000011 grad: 0.1410 (0.1376) loss: 0.7998 (0.8095) time: 0.1489 data: 0.0604 max mem: 9377 +Train: [81] [3700/6250] eta: 0:06:35 lr: 0.000011 grad: 0.1324 (0.1376) loss: 0.8149 (0.8095) time: 0.1386 data: 0.0587 max mem: 9377 +Train: [81] [3800/6250] eta: 0:06:19 lr: 0.000011 grad: 0.1272 (0.1375) loss: 0.8142 (0.8095) time: 0.1407 data: 0.0618 max mem: 9377 +Train: [81] [3900/6250] eta: 0:06:03 lr: 0.000011 grad: 0.1344 (0.1375) loss: 0.8114 (0.8095) time: 0.1379 data: 0.0562 max mem: 9377 +Train: [81] [4000/6250] eta: 0:05:47 lr: 0.000011 grad: 0.1408 (0.1375) loss: 0.8051 (0.8094) time: 0.1671 data: 0.0866 max mem: 9377 +Train: [81] [4100/6250] eta: 0:05:31 lr: 0.000011 grad: 0.1221 (0.1375) loss: 0.8124 (0.8094) time: 0.1378 data: 0.0528 max mem: 9377 +Train: [81] [4200/6250] eta: 0:05:15 lr: 0.000011 grad: 0.1380 (0.1375) loss: 0.8034 (0.8093) time: 0.1064 data: 0.0215 max mem: 9377 +Train: [81] [4300/6250] eta: 0:04:59 lr: 0.000011 grad: 0.1420 (0.1375) loss: 0.8057 (0.8093) time: 0.1426 data: 0.0587 max mem: 9377 +Train: [81] [4400/6250] eta: 0:04:43 lr: 0.000011 grad: 0.1400 (0.1376) loss: 0.8059 (0.8092) time: 0.1841 data: 0.1018 max mem: 9377 +Train: [81] [4500/6250] eta: 0:04:28 lr: 0.000011 grad: 0.1400 (0.1376) loss: 0.8111 (0.8092) time: 0.1502 data: 0.0698 max mem: 9377 +Train: [81] [4600/6250] eta: 0:04:12 lr: 0.000011 grad: 0.1368 (0.1376) loss: 0.8104 (0.8092) time: 0.1522 data: 0.0699 max mem: 9377 +Train: [81] [4700/6250] eta: 0:03:57 lr: 0.000011 grad: 0.1292 (0.1376) loss: 0.8112 (0.8091) time: 0.1715 data: 0.0881 max mem: 9377 +Train: [81] [4800/6250] eta: 0:03:41 lr: 0.000011 grad: 0.1396 (0.1377) loss: 0.8045 (0.8091) time: 0.1565 data: 0.0752 max mem: 9377 +Train: [81] [4900/6250] eta: 0:03:26 lr: 0.000011 grad: 0.1379 (0.1377) loss: 0.8093 (0.8090) time: 0.1353 data: 0.0540 max mem: 9377 +Train: [81] [5000/6250] eta: 0:03:10 lr: 0.000011 grad: 0.1348 (0.1377) loss: 0.8083 (0.8090) time: 0.1526 data: 0.0724 max mem: 9377 +Train: [81] [5100/6250] eta: 0:02:55 lr: 0.000011 grad: 0.1317 (0.1377) loss: 0.8115 (0.8089) time: 0.1336 data: 0.0532 max mem: 9377 +Train: [81] [5200/6250] eta: 0:02:40 lr: 0.000011 grad: 0.1378 (0.1377) loss: 0.8034 (0.8089) time: 0.1648 data: 0.0826 max mem: 9377 +Train: [81] [5300/6250] eta: 0:02:24 lr: 0.000011 grad: 0.1423 (0.1377) loss: 0.7993 (0.8089) time: 0.1550 data: 0.0780 max mem: 9377 +Train: [81] [5400/6250] eta: 0:02:09 lr: 0.000011 grad: 0.1335 (0.1378) loss: 0.8070 (0.8088) time: 0.1154 data: 0.0279 max mem: 9377 +Train: [81] [5500/6250] eta: 0:01:54 lr: 0.000011 grad: 0.1384 (0.1378) loss: 0.8076 (0.8088) time: 0.2364 data: 0.1600 max mem: 9377 +Train: [81] [5600/6250] eta: 0:01:39 lr: 0.000011 grad: 0.1343 (0.1379) loss: 0.8101 (0.8087) time: 0.1751 data: 0.0960 max mem: 9377 +Train: [81] [5700/6250] eta: 0:01:24 lr: 0.000011 grad: 0.1381 (0.1379) loss: 0.8077 (0.8087) time: 0.1354 data: 0.0492 max mem: 9377 +Train: [81] [5800/6250] eta: 0:01:08 lr: 0.000011 grad: 0.1329 (0.1380) loss: 0.8003 (0.8086) time: 0.1498 data: 0.0672 max mem: 9377 +Train: [81] [5900/6250] eta: 0:00:53 lr: 0.000011 grad: 0.1340 (0.1380) loss: 0.8129 (0.8086) time: 0.1547 data: 0.0766 max mem: 9377 +Train: [81] [6000/6250] eta: 0:00:38 lr: 0.000011 grad: 0.1467 (0.1382) loss: 0.8033 (0.8085) time: 0.1708 data: 0.0793 max mem: 9377 +Train: [81] [6100/6250] eta: 0:00:22 lr: 0.000011 grad: 0.1314 (0.1383) loss: 0.8047 (0.8084) time: 0.1466 data: 0.0655 max mem: 9377 +Train: [81] [6200/6250] eta: 0:00:07 lr: 0.000011 grad: 0.1390 (0.1385) loss: 0.8019 (0.8084) time: 0.1423 data: 0.0552 max mem: 9377 +Train: [81] [6249/6250] eta: 0:00:00 lr: 0.000011 grad: 0.1359 (0.1385) loss: 0.8119 (0.8083) time: 0.1993 data: 0.1235 max mem: 9377 +Train: [81] Total time: 0:16:04 (0.1543 s / it) +Averaged stats: lr: 0.000011 grad: 0.1359 (0.1385) loss: 0.8119 (0.8083) +Eval (hcp-train-subset): [81] [ 0/62] eta: 0:03:46 loss: 0.8222 (0.8222) time: 3.6546 data: 3.5528 max mem: 9377 +Eval (hcp-train-subset): [81] [61/62] eta: 0:00:00 loss: 0.8142 (0.8154) time: 0.1278 data: 0.1029 max mem: 9377 +Eval (hcp-train-subset): [81] Total time: 0:00:13 (0.2224 s / it) +Averaged stats (hcp-train-subset): loss: 0.8142 (0.8154) +Eval (hcp-val): [81] [ 0/62] eta: 0:05:30 loss: 0.8238 (0.8238) time: 5.3370 data: 5.2909 max mem: 9377 +Eval (hcp-val): [81] [61/62] eta: 0:00:00 loss: 0.8285 (0.8308) time: 0.1139 data: 0.0872 max mem: 9377 +Eval (hcp-val): [81] Total time: 0:00:13 (0.2179 s / it) +Averaged stats (hcp-val): loss: 0.8285 (0.8308) +Eval (nsd-val): [81] [ 0/62] eta: 0:04:00 loss: 0.8067 (0.8067) time: 3.8726 data: 3.7963 max mem: 9377 +Eval (nsd-val): [81] [61/62] eta: 0:00:00 loss: 0.8118 (0.8149) time: 0.0993 data: 0.0725 max mem: 9377 +Eval (nsd-val): [81] Total time: 0:00:13 (0.2113 s / it) +Averaged stats (nsd-val): loss: 0.8118 (0.8149) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [82] [ 0/6250] eta: 8:54:08 lr: 0.000011 grad: 0.2265 (0.2265) loss: 0.7991 (0.7991) time: 5.1277 data: 4.8718 max mem: 9377 +Train: [82] [ 100/6250] eta: 0:19:51 lr: 0.000011 grad: 0.1474 (0.1609) loss: 0.8342 (0.8233) time: 0.1336 data: 0.0425 max mem: 9377 +Train: [82] [ 200/6250] eta: 0:16:53 lr: 0.000011 grad: 0.1270 (0.1513) loss: 0.8225 (0.8212) time: 0.1391 data: 0.0306 max mem: 9377 +Train: [82] [ 300/6250] eta: 0:15:44 lr: 0.000011 grad: 0.1204 (0.1456) loss: 0.8248 (0.8216) time: 0.1491 data: 0.0618 max mem: 9377 +Train: [82] [ 400/6250] eta: 0:14:48 lr: 0.000011 grad: 0.1416 (0.1435) loss: 0.8053 (0.8208) time: 0.1192 data: 0.0245 max mem: 9377 +Train: [82] [ 500/6250] eta: 0:14:23 lr: 0.000011 grad: 0.1330 (0.1434) loss: 0.8183 (0.8193) time: 0.1479 data: 0.0605 max mem: 9377 +Train: [82] [ 600/6250] eta: 0:14:06 lr: 0.000011 grad: 0.1270 (0.1431) loss: 0.8175 (0.8181) time: 0.1290 data: 0.0383 max mem: 9377 +Train: [82] [ 700/6250] eta: 0:13:53 lr: 0.000011 grad: 0.1361 (0.1428) loss: 0.8141 (0.8170) time: 0.1407 data: 0.0473 max mem: 9377 +Train: [82] [ 800/6250] eta: 0:13:32 lr: 0.000011 grad: 0.1289 (0.1427) loss: 0.8155 (0.8160) time: 0.1199 data: 0.0313 max mem: 9377 +Train: [82] [ 900/6250] eta: 0:13:15 lr: 0.000011 grad: 0.1346 (0.1427) loss: 0.8114 (0.8152) time: 0.1345 data: 0.0383 max mem: 9377 +Train: [82] [1000/6250] eta: 0:13:07 lr: 0.000011 grad: 0.1340 (0.1428) loss: 0.8110 (0.8145) time: 0.1756 data: 0.0838 max mem: 9377 +Train: [82] [1100/6250] eta: 0:13:02 lr: 0.000011 grad: 0.1373 (0.1425) loss: 0.8096 (0.8140) time: 0.1506 data: 0.0590 max mem: 9377 +Train: [82] [1200/6250] eta: 0:12:50 lr: 0.000011 grad: 0.1379 (0.1424) loss: 0.8061 (0.8134) time: 0.1355 data: 0.0512 max mem: 9377 +Train: [82] [1300/6250] eta: 0:12:33 lr: 0.000011 grad: 0.1360 (0.1423) loss: 0.8119 (0.8129) time: 0.1337 data: 0.0536 max mem: 9377 +Train: [82] [1400/6250] eta: 0:12:16 lr: 0.000010 grad: 0.1377 (0.1424) loss: 0.8006 (0.8125) time: 0.1592 data: 0.0796 max mem: 9377 +Train: [82] [1500/6250] eta: 0:11:59 lr: 0.000010 grad: 0.1351 (0.1421) loss: 0.8126 (0.8122) time: 0.1264 data: 0.0484 max mem: 9377 +Train: [82] [1600/6250] eta: 0:11:43 lr: 0.000010 grad: 0.1409 (0.1421) loss: 0.8097 (0.8118) time: 0.1489 data: 0.0695 max mem: 9377 +Train: [82] [1700/6250] eta: 0:11:26 lr: 0.000010 grad: 0.1370 (0.1422) loss: 0.8076 (0.8114) time: 0.1500 data: 0.0681 max mem: 9377 +Train: [82] [1800/6250] eta: 0:11:08 lr: 0.000010 grad: 0.1342 (0.1422) loss: 0.8047 (0.8110) time: 0.1367 data: 0.0517 max mem: 9377 +Train: [82] [1900/6250] eta: 0:10:49 lr: 0.000010 grad: 0.1344 (0.1421) loss: 0.8028 (0.8107) time: 0.1213 data: 0.0343 max mem: 9377 +Train: [82] [2000/6250] eta: 0:10:30 lr: 0.000010 grad: 0.1428 (0.1422) loss: 0.8099 (0.8106) time: 0.1383 data: 0.0567 max mem: 9377 +Train: [82] [2100/6250] eta: 0:10:12 lr: 0.000010 grad: 0.1347 (0.1421) loss: 0.8099 (0.8105) time: 0.1321 data: 0.0475 max mem: 9377 +Train: [82] [2200/6250] eta: 0:09:55 lr: 0.000010 grad: 0.1344 (0.1422) loss: 0.8054 (0.8103) time: 0.1379 data: 0.0480 max mem: 9377 +Train: [82] [2300/6250] eta: 0:09:40 lr: 0.000010 grad: 0.1433 (0.1422) loss: 0.8010 (0.8101) time: 0.1624 data: 0.0808 max mem: 9377 +Train: [82] [2400/6250] eta: 0:09:25 lr: 0.000010 grad: 0.1431 (0.1421) loss: 0.8103 (0.8100) time: 0.1477 data: 0.0636 max mem: 9377 +Train: [82] [2500/6250] eta: 0:09:08 lr: 0.000010 grad: 0.1419 (0.1421) loss: 0.8040 (0.8098) time: 0.1278 data: 0.0399 max mem: 9377 +Train: [82] [2600/6250] eta: 0:08:54 lr: 0.000010 grad: 0.1395 (0.1420) loss: 0.7962 (0.8097) time: 0.1533 data: 0.0699 max mem: 9377 +Train: [82] [2700/6250] eta: 0:08:38 lr: 0.000010 grad: 0.1439 (0.1421) loss: 0.8017 (0.8096) time: 0.1300 data: 0.0455 max mem: 9377 +Train: [82] [2800/6250] eta: 0:08:24 lr: 0.000010 grad: 0.1394 (0.1422) loss: 0.8071 (0.8095) time: 0.1545 data: 0.0745 max mem: 9377 +Train: [82] [2900/6250] eta: 0:08:09 lr: 0.000010 grad: 0.1384 (0.1422) loss: 0.8188 (0.8095) time: 0.1740 data: 0.0883 max mem: 9377 +Train: [82] [3000/6250] eta: 0:07:54 lr: 0.000010 grad: 0.1406 (0.1421) loss: 0.8066 (0.8094) time: 0.1498 data: 0.0695 max mem: 9377 +Train: [82] [3100/6250] eta: 0:07:38 lr: 0.000010 grad: 0.1331 (0.1419) loss: 0.8166 (0.8093) time: 0.1688 data: 0.0864 max mem: 9377 +Train: [82] [3200/6250] eta: 0:07:23 lr: 0.000010 grad: 0.1386 (0.1418) loss: 0.8115 (0.8092) time: 0.1557 data: 0.0757 max mem: 9377 +Train: [82] [3300/6250] eta: 0:07:08 lr: 0.000010 grad: 0.1427 (0.1418) loss: 0.8069 (0.8092) time: 0.1332 data: 0.0493 max mem: 9377 +Train: [82] [3400/6250] eta: 0:06:53 lr: 0.000010 grad: 0.1422 (0.1417) loss: 0.8113 (0.8093) time: 0.1315 data: 0.0490 max mem: 9377 +Train: [82] [3500/6250] eta: 0:06:39 lr: 0.000010 grad: 0.1321 (0.1416) loss: 0.8146 (0.8093) time: 0.1413 data: 0.0630 max mem: 9377 +Train: [82] [3600/6250] eta: 0:06:25 lr: 0.000010 grad: 0.1329 (0.1414) loss: 0.8032 (0.8093) time: 0.1417 data: 0.0590 max mem: 9377 +Train: [82] [3700/6250] eta: 0:06:10 lr: 0.000010 grad: 0.1272 (0.1412) loss: 0.8049 (0.8094) time: 0.1320 data: 0.0462 max mem: 9377 +Train: [82] [3800/6250] eta: 0:05:56 lr: 0.000010 grad: 0.1289 (0.1410) loss: 0.8142 (0.8094) time: 0.1419 data: 0.0548 max mem: 9377 +Train: [82] [3900/6250] eta: 0:05:41 lr: 0.000010 grad: 0.1366 (0.1409) loss: 0.8075 (0.8094) time: 0.1499 data: 0.0691 max mem: 9377 +Train: [82] [4000/6250] eta: 0:05:27 lr: 0.000010 grad: 0.1338 (0.1409) loss: 0.8176 (0.8095) time: 0.1313 data: 0.0535 max mem: 9377 +Train: [82] [4100/6250] eta: 0:05:13 lr: 0.000010 grad: 0.1376 (0.1409) loss: 0.8074 (0.8095) time: 0.1416 data: 0.0553 max mem: 9377 +Train: [82] [4200/6250] eta: 0:04:58 lr: 0.000010 grad: 0.1371 (0.1409) loss: 0.8072 (0.8094) time: 0.1398 data: 0.0520 max mem: 9377 +Train: [82] [4300/6250] eta: 0:04:43 lr: 0.000010 grad: 0.1352 (0.1408) loss: 0.8124 (0.8094) time: 0.1432 data: 0.0553 max mem: 9377 +Train: [82] [4400/6250] eta: 0:04:29 lr: 0.000010 grad: 0.1294 (0.1407) loss: 0.8111 (0.8093) time: 0.1465 data: 0.0643 max mem: 9377 +Train: [82] [4500/6250] eta: 0:04:14 lr: 0.000010 grad: 0.1293 (0.1407) loss: 0.8099 (0.8093) time: 0.1389 data: 0.0520 max mem: 9377 +Train: [82] [4600/6250] eta: 0:04:00 lr: 0.000010 grad: 0.1322 (0.1406) loss: 0.8167 (0.8093) time: 0.1051 data: 0.0176 max mem: 9377 +Train: [82] [4700/6250] eta: 0:03:45 lr: 0.000010 grad: 0.1324 (0.1406) loss: 0.8124 (0.8092) time: 0.1254 data: 0.0375 max mem: 9377 +Train: [82] [4800/6250] eta: 0:03:30 lr: 0.000010 grad: 0.1285 (0.1406) loss: 0.8087 (0.8092) time: 0.1364 data: 0.0459 max mem: 9377 +Train: [82] [4900/6250] eta: 0:03:16 lr: 0.000010 grad: 0.1357 (0.1406) loss: 0.8090 (0.8091) time: 0.1519 data: 0.0696 max mem: 9377 +Train: [82] [5000/6250] eta: 0:03:01 lr: 0.000010 grad: 0.1316 (0.1406) loss: 0.8019 (0.8090) time: 0.1311 data: 0.0479 max mem: 9377 +Train: [82] [5100/6250] eta: 0:02:47 lr: 0.000010 grad: 0.1305 (0.1405) loss: 0.8044 (0.8090) time: 0.1500 data: 0.0677 max mem: 9377 +Train: [82] [5200/6250] eta: 0:02:32 lr: 0.000010 grad: 0.1389 (0.1406) loss: 0.8056 (0.8089) time: 0.1362 data: 0.0599 max mem: 9377 +Train: [82] [5300/6250] eta: 0:02:17 lr: 0.000010 grad: 0.1344 (0.1405) loss: 0.8077 (0.8089) time: 0.1352 data: 0.0557 max mem: 9377 +Train: [82] [5400/6250] eta: 0:02:03 lr: 0.000010 grad: 0.1219 (0.1405) loss: 0.8118 (0.8089) time: 0.1948 data: 0.1228 max mem: 9377 +Train: [82] [5500/6250] eta: 0:01:49 lr: 0.000010 grad: 0.1322 (0.1405) loss: 0.8093 (0.8089) time: 0.1258 data: 0.0420 max mem: 9377 +Train: [82] [5600/6250] eta: 0:01:34 lr: 0.000010 grad: 0.1381 (0.1405) loss: 0.8020 (0.8089) time: 0.1690 data: 0.0868 max mem: 9377 +Train: [82] [5700/6250] eta: 0:01:20 lr: 0.000010 grad: 0.1367 (0.1405) loss: 0.8144 (0.8089) time: 0.1413 data: 0.0624 max mem: 9377 +Train: [82] [5800/6250] eta: 0:01:05 lr: 0.000010 grad: 0.1387 (0.1405) loss: 0.8127 (0.8089) time: 0.1186 data: 0.0364 max mem: 9377 +Train: [82] [5900/6250] eta: 0:00:50 lr: 0.000010 grad: 0.1384 (0.1405) loss: 0.8060 (0.8089) time: 0.1538 data: 0.0731 max mem: 9377 +Train: [82] [6000/6250] eta: 0:00:36 lr: 0.000010 grad: 0.1438 (0.1404) loss: 0.8023 (0.8089) time: 0.1549 data: 0.0728 max mem: 9377 +Train: [82] [6100/6250] eta: 0:00:21 lr: 0.000010 grad: 0.1454 (0.1404) loss: 0.8079 (0.8089) time: 0.1514 data: 0.0600 max mem: 9377 +Train: [82] [6200/6250] eta: 0:00:07 lr: 0.000010 grad: 0.1475 (0.1405) loss: 0.8050 (0.8089) time: 0.1555 data: 0.0717 max mem: 9377 +Train: [82] [6249/6250] eta: 0:00:00 lr: 0.000010 grad: 0.1477 (0.1405) loss: 0.8042 (0.8088) time: 0.1174 data: 0.0290 max mem: 9377 +Train: [82] Total time: 0:15:16 (0.1466 s / it) +Averaged stats: lr: 0.000010 grad: 0.1477 (0.1405) loss: 0.8042 (0.8088) +Eval (hcp-train-subset): [82] [ 0/62] eta: 0:05:30 loss: 0.8202 (0.8202) time: 5.3273 data: 5.2971 max mem: 9377 +Eval (hcp-train-subset): [82] [61/62] eta: 0:00:00 loss: 0.8145 (0.8147) time: 0.0978 data: 0.0725 max mem: 9377 +Eval (hcp-train-subset): [82] Total time: 0:00:14 (0.2417 s / it) +Averaged stats (hcp-train-subset): loss: 0.8145 (0.8147) +Eval (hcp-val): [82] [ 0/62] eta: 0:04:28 loss: 0.8269 (0.8269) time: 4.3323 data: 4.2473 max mem: 9377 +Eval (hcp-val): [82] [61/62] eta: 0:00:00 loss: 0.8300 (0.8312) time: 0.1356 data: 0.1086 max mem: 9377 +Eval (hcp-val): [82] Total time: 0:00:14 (0.2381 s / it) +Averaged stats (hcp-val): loss: 0.8300 (0.8312) +Eval (nsd-val): [82] [ 0/62] eta: 0:05:31 loss: 0.8037 (0.8037) time: 5.3528 data: 5.3208 max mem: 9377 +Eval (nsd-val): [82] [61/62] eta: 0:00:00 loss: 0.8161 (0.8183) time: 0.1312 data: 0.1056 max mem: 9377 +Eval (nsd-val): [82] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (nsd-val): loss: 0.8161 (0.8183) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [83] [ 0/6250] eta: 9:53:33 lr: 0.000010 grad: 0.0861 (0.0861) loss: 0.8766 (0.8766) time: 5.6981 data: 5.3146 max mem: 9377 +Train: [83] [ 100/6250] eta: 0:22:11 lr: 0.000010 grad: 0.1227 (0.1482) loss: 0.8304 (0.8319) time: 0.1616 data: 0.0558 max mem: 9377 +Train: [83] [ 200/6250] eta: 0:19:21 lr: 0.000010 grad: 0.1622 (0.1432) loss: 0.8181 (0.8287) time: 0.1389 data: 0.0294 max mem: 9377 +Train: [83] [ 300/6250] eta: 0:18:02 lr: 0.000010 grad: 0.1371 (0.1445) loss: 0.8045 (0.8240) time: 0.1526 data: 0.0626 max mem: 9377 +Train: [83] [ 400/6250] eta: 0:17:02 lr: 0.000010 grad: 0.1421 (0.1454) loss: 0.8127 (0.8211) time: 0.1640 data: 0.0666 max mem: 9377 +Train: [83] [ 500/6250] eta: 0:16:09 lr: 0.000010 grad: 0.1334 (0.1461) loss: 0.8110 (0.8190) time: 0.1482 data: 0.0531 max mem: 9377 +Train: [83] [ 600/6250] eta: 0:15:42 lr: 0.000010 grad: 0.1347 (0.1456) loss: 0.8141 (0.8174) time: 0.1573 data: 0.0634 max mem: 9377 +Train: [83] [ 700/6250] eta: 0:15:28 lr: 0.000009 grad: 0.1433 (0.1461) loss: 0.8062 (0.8160) time: 0.1638 data: 0.0663 max mem: 9377 +Train: [83] [ 800/6250] eta: 0:15:16 lr: 0.000009 grad: 0.1368 (0.1457) loss: 0.8137 (0.8154) time: 0.1552 data: 0.0636 max mem: 9377 +Train: [83] [ 900/6250] eta: 0:15:17 lr: 0.000009 grad: 0.1251 (0.1447) loss: 0.8167 (0.8153) time: 0.2218 data: 0.1267 max mem: 9377 +Train: [83] [1000/6250] eta: 0:14:54 lr: 0.000009 grad: 0.1339 (0.1446) loss: 0.8060 (0.8147) time: 0.1520 data: 0.0621 max mem: 9377 +Train: [83] [1100/6250] eta: 0:14:27 lr: 0.000009 grad: 0.1318 (0.1441) loss: 0.8082 (0.8142) time: 0.1376 data: 0.0472 max mem: 9377 +Train: [83] [1200/6250] eta: 0:14:02 lr: 0.000009 grad: 0.1432 (0.1435) loss: 0.8112 (0.8138) time: 0.1530 data: 0.0637 max mem: 9377 +Train: [83] [1300/6250] eta: 0:13:38 lr: 0.000009 grad: 0.1355 (0.1435) loss: 0.8053 (0.8134) time: 0.1304 data: 0.0465 max mem: 9377 +Train: [83] [1400/6250] eta: 0:13:16 lr: 0.000009 grad: 0.1345 (0.1431) loss: 0.8096 (0.8130) time: 0.1475 data: 0.0622 max mem: 9377 +Train: [83] [1500/6250] eta: 0:12:57 lr: 0.000009 grad: 0.1401 (0.1430) loss: 0.8147 (0.8126) time: 0.1627 data: 0.0736 max mem: 9377 +Train: [83] [1600/6250] eta: 0:12:35 lr: 0.000009 grad: 0.1303 (0.1427) loss: 0.8051 (0.8122) time: 0.1392 data: 0.0493 max mem: 9377 +Train: [83] [1700/6250] eta: 0:12:14 lr: 0.000009 grad: 0.1402 (0.1426) loss: 0.8102 (0.8120) time: 0.1421 data: 0.0633 max mem: 9377 +Train: [83] [1800/6250] eta: 0:11:56 lr: 0.000009 grad: 0.1349 (0.1423) loss: 0.8115 (0.8116) time: 0.1867 data: 0.1109 max mem: 9377 +Train: [83] [1900/6250] eta: 0:11:41 lr: 0.000009 grad: 0.1357 (0.1422) loss: 0.8021 (0.8114) time: 0.1637 data: 0.0819 max mem: 9377 +Train: [83] [2000/6250] eta: 0:11:22 lr: 0.000009 grad: 0.1425 (0.1423) loss: 0.8050 (0.8110) time: 0.1433 data: 0.0586 max mem: 9377 +Train: [83] [2100/6250] eta: 0:11:07 lr: 0.000009 grad: 0.1410 (0.1424) loss: 0.8025 (0.8105) time: 0.1744 data: 0.0930 max mem: 9377 +Train: [83] [2200/6250] eta: 0:10:53 lr: 0.000009 grad: 0.1315 (0.1423) loss: 0.8087 (0.8102) time: 0.1512 data: 0.0675 max mem: 9377 +Train: [83] [2300/6250] eta: 0:10:37 lr: 0.000009 grad: 0.1417 (0.1424) loss: 0.7936 (0.8099) time: 0.1578 data: 0.0750 max mem: 9377 +Train: [83] [2400/6250] eta: 0:10:18 lr: 0.000009 grad: 0.1430 (0.1424) loss: 0.8029 (0.8095) time: 0.1407 data: 0.0603 max mem: 9377 +Train: [83] [2500/6250] eta: 0:10:00 lr: 0.000009 grad: 0.1315 (0.1424) loss: 0.8095 (0.8093) time: 0.1324 data: 0.0468 max mem: 9377 +Train: [83] [2600/6250] eta: 0:09:41 lr: 0.000009 grad: 0.1341 (0.1425) loss: 0.8054 (0.8090) time: 0.1300 data: 0.0492 max mem: 9377 +Train: [83] [2700/6250] eta: 0:09:23 lr: 0.000009 grad: 0.1404 (0.1425) loss: 0.8055 (0.8088) time: 0.1544 data: 0.0754 max mem: 9377 +Train: [83] [2800/6250] eta: 0:09:05 lr: 0.000009 grad: 0.1443 (0.1423) loss: 0.8042 (0.8087) time: 0.1490 data: 0.0706 max mem: 9377 +Train: [83] [2900/6250] eta: 0:08:47 lr: 0.000009 grad: 0.1327 (0.1424) loss: 0.8023 (0.8085) time: 0.1433 data: 0.0558 max mem: 9377 +Train: [83] [3000/6250] eta: 0:08:30 lr: 0.000009 grad: 0.1383 (0.1423) loss: 0.8050 (0.8084) time: 0.1411 data: 0.0566 max mem: 9377 +Train: [83] [3100/6250] eta: 0:08:13 lr: 0.000009 grad: 0.1349 (0.1422) loss: 0.8027 (0.8083) time: 0.1495 data: 0.0694 max mem: 9377 +Train: [83] [3200/6250] eta: 0:07:56 lr: 0.000009 grad: 0.1468 (0.1423) loss: 0.8019 (0.8081) time: 0.1427 data: 0.0604 max mem: 9377 +Train: [83] [3300/6250] eta: 0:07:39 lr: 0.000009 grad: 0.1461 (0.1425) loss: 0.8019 (0.8079) time: 0.1114 data: 0.0209 max mem: 9377 +Train: [83] [3400/6250] eta: 0:07:22 lr: 0.000009 grad: 0.1391 (0.1424) loss: 0.7995 (0.8078) time: 0.1281 data: 0.0398 max mem: 9377 +Train: [83] [3500/6250] eta: 0:07:05 lr: 0.000009 grad: 0.1390 (0.1424) loss: 0.8054 (0.8077) time: 0.1312 data: 0.0475 max mem: 9377 +Train: [83] [3600/6250] eta: 0:06:49 lr: 0.000009 grad: 0.1383 (0.1424) loss: 0.8007 (0.8076) time: 0.1259 data: 0.0477 max mem: 9377 +Train: [83] [3700/6250] eta: 0:06:32 lr: 0.000009 grad: 0.1471 (0.1423) loss: 0.8056 (0.8075) time: 0.1424 data: 0.0585 max mem: 9377 +Train: [83] [3800/6250] eta: 0:06:16 lr: 0.000009 grad: 0.1359 (0.1423) loss: 0.7997 (0.8074) time: 0.1374 data: 0.0514 max mem: 9377 +Train: [83] [3900/6250] eta: 0:05:59 lr: 0.000009 grad: 0.1367 (0.1423) loss: 0.8070 (0.8073) time: 0.1488 data: 0.0662 max mem: 9377 +Train: [83] [4000/6250] eta: 0:05:43 lr: 0.000009 grad: 0.1385 (0.1424) loss: 0.8044 (0.8071) time: 0.1500 data: 0.0707 max mem: 9377 +Train: [83] [4100/6250] eta: 0:05:27 lr: 0.000009 grad: 0.1462 (0.1425) loss: 0.8047 (0.8070) time: 0.1477 data: 0.0672 max mem: 9377 +Train: [83] [4200/6250] eta: 0:05:12 lr: 0.000009 grad: 0.1514 (0.1426) loss: 0.8024 (0.8068) time: 0.1372 data: 0.0579 max mem: 9377 +Train: [83] [4300/6250] eta: 0:04:56 lr: 0.000009 grad: 0.1491 (0.1428) loss: 0.8122 (0.8067) time: 0.1439 data: 0.0582 max mem: 9377 +Train: [83] [4400/6250] eta: 0:04:41 lr: 0.000009 grad: 0.1456 (0.1429) loss: 0.8057 (0.8066) time: 0.1138 data: 0.0344 max mem: 9377 +Train: [83] [4500/6250] eta: 0:04:25 lr: 0.000009 grad: 0.1383 (0.1430) loss: 0.7978 (0.8065) time: 0.1166 data: 0.0307 max mem: 9377 +Train: [83] [4600/6250] eta: 0:04:09 lr: 0.000009 grad: 0.1395 (0.1431) loss: 0.8087 (0.8064) time: 0.1508 data: 0.0676 max mem: 9377 +Train: [83] [4700/6250] eta: 0:03:54 lr: 0.000009 grad: 0.1434 (0.1432) loss: 0.8025 (0.8064) time: 0.1319 data: 0.0451 max mem: 9377 +Train: [83] [4800/6250] eta: 0:03:38 lr: 0.000009 grad: 0.1312 (0.1432) loss: 0.8048 (0.8063) time: 0.1322 data: 0.0491 max mem: 9377 +Train: [83] [4900/6250] eta: 0:03:23 lr: 0.000009 grad: 0.1472 (0.1432) loss: 0.8024 (0.8063) time: 0.1417 data: 0.0633 max mem: 9377 +Train: [83] [5000/6250] eta: 0:03:08 lr: 0.000009 grad: 0.1444 (0.1433) loss: 0.8029 (0.8063) time: 0.1315 data: 0.0489 max mem: 9377 +Train: [83] [5100/6250] eta: 0:02:52 lr: 0.000009 grad: 0.1370 (0.1432) loss: 0.8078 (0.8063) time: 0.1384 data: 0.0527 max mem: 9377 +Train: [83] [5200/6250] eta: 0:02:37 lr: 0.000009 grad: 0.1393 (0.1432) loss: 0.8050 (0.8063) time: 0.1581 data: 0.0784 max mem: 9377 +Train: [83] [5300/6250] eta: 0:02:23 lr: 0.000009 grad: 0.1441 (0.1432) loss: 0.8062 (0.8062) time: 0.1413 data: 0.0539 max mem: 9377 +Train: [83] [5400/6250] eta: 0:02:08 lr: 0.000009 grad: 0.1445 (0.1432) loss: 0.8086 (0.8062) time: 0.1703 data: 0.0910 max mem: 9377 +Train: [83] [5500/6250] eta: 0:01:53 lr: 0.000009 grad: 0.1396 (0.1431) loss: 0.8146 (0.8062) time: 0.1539 data: 0.0749 max mem: 9377 +Train: [83] [5600/6250] eta: 0:01:38 lr: 0.000009 grad: 0.1416 (0.1431) loss: 0.8005 (0.8062) time: 0.1415 data: 0.0540 max mem: 9377 +Train: [83] [5700/6250] eta: 0:01:23 lr: 0.000009 grad: 0.1367 (0.1432) loss: 0.8157 (0.8062) time: 0.1444 data: 0.0618 max mem: 9377 +Train: [83] [5800/6250] eta: 0:01:07 lr: 0.000009 grad: 0.1448 (0.1432) loss: 0.8053 (0.8061) time: 0.1594 data: 0.0797 max mem: 9377 +Train: [83] [5900/6250] eta: 0:00:52 lr: 0.000009 grad: 0.1391 (0.1433) loss: 0.8058 (0.8061) time: 0.1369 data: 0.0468 max mem: 9377 +Train: [83] [6000/6250] eta: 0:00:37 lr: 0.000009 grad: 0.1382 (0.1433) loss: 0.8000 (0.8060) time: 0.1563 data: 0.0750 max mem: 9377 +Train: [83] [6100/6250] eta: 0:00:22 lr: 0.000009 grad: 0.1388 (0.1436) loss: 0.8096 (0.8060) time: 0.1329 data: 0.0475 max mem: 9377 +Train: [83] [6200/6250] eta: 0:00:07 lr: 0.000009 grad: 0.1369 (0.1437) loss: 0.8067 (0.8060) time: 0.1323 data: 0.0523 max mem: 9377 +Train: [83] [6249/6250] eta: 0:00:00 lr: 0.000009 grad: 0.1391 (0.1436) loss: 0.8007 (0.8060) time: 0.1154 data: 0.0329 max mem: 9377 +Train: [83] Total time: 0:15:41 (0.1507 s / it) +Averaged stats: lr: 0.000009 grad: 0.1391 (0.1436) loss: 0.8007 (0.8060) +Eval (hcp-train-subset): [83] [ 0/62] eta: 0:05:45 loss: 0.8177 (0.8177) time: 5.5679 data: 5.5382 max mem: 9377 +Eval (hcp-train-subset): [83] [61/62] eta: 0:00:00 loss: 0.8152 (0.8142) time: 0.1121 data: 0.0856 max mem: 9377 +Eval (hcp-train-subset): [83] Total time: 0:00:13 (0.2128 s / it) +Averaged stats (hcp-train-subset): loss: 0.8152 (0.8142) +Eval (hcp-val): [83] [ 0/62] eta: 0:04:48 loss: 0.8263 (0.8263) time: 4.6460 data: 4.5799 max mem: 9377 +Eval (hcp-val): [83] [61/62] eta: 0:00:00 loss: 0.8299 (0.8306) time: 0.1184 data: 0.0933 max mem: 9377 +Eval (hcp-val): [83] Total time: 0:00:12 (0.2036 s / it) +Averaged stats (hcp-val): loss: 0.8299 (0.8306) +Eval (nsd-val): [83] [ 0/62] eta: 0:05:19 loss: 0.8070 (0.8070) time: 5.1611 data: 5.1313 max mem: 9377 +Eval (nsd-val): [83] [61/62] eta: 0:00:00 loss: 0.8134 (0.8159) time: 0.1030 data: 0.0763 max mem: 9377 +Eval (nsd-val): [83] Total time: 0:00:13 (0.2137 s / it) +Averaged stats (nsd-val): loss: 0.8134 (0.8159) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [84] [ 0/6250] eta: 8:00:18 lr: 0.000009 grad: 0.0970 (0.0970) loss: 0.8608 (0.8608) time: 4.6109 data: 4.3953 max mem: 9377 +Train: [84] [ 100/6250] eta: 0:19:47 lr: 0.000009 grad: 0.1277 (0.1761) loss: 0.8318 (0.8255) time: 0.1511 data: 0.0513 max mem: 9377 +Train: [84] [ 200/6250] eta: 0:17:04 lr: 0.000009 grad: 0.1385 (0.1594) loss: 0.8215 (0.8242) time: 0.1551 data: 0.0570 max mem: 9377 +Train: [84] [ 300/6250] eta: 0:15:58 lr: 0.000008 grad: 0.1242 (0.1524) loss: 0.8258 (0.8240) time: 0.1442 data: 0.0465 max mem: 9377 +Train: [84] [ 400/6250] eta: 0:15:28 lr: 0.000008 grad: 0.1366 (0.1494) loss: 0.8218 (0.8239) time: 0.1410 data: 0.0502 max mem: 9377 +Train: [84] [ 500/6250] eta: 0:15:04 lr: 0.000008 grad: 0.1399 (0.1486) loss: 0.8208 (0.8235) time: 0.1424 data: 0.0502 max mem: 9377 +Train: [84] [ 600/6250] eta: 0:14:37 lr: 0.000008 grad: 0.1535 (0.1482) loss: 0.8073 (0.8224) time: 0.1460 data: 0.0551 max mem: 9377 +Train: [84] [ 700/6250] eta: 0:14:18 lr: 0.000008 grad: 0.1386 (0.1485) loss: 0.8066 (0.8209) time: 0.1610 data: 0.0762 max mem: 9377 +Train: [84] [ 800/6250] eta: 0:14:15 lr: 0.000008 grad: 0.1521 (0.1486) loss: 0.8040 (0.8192) time: 0.1658 data: 0.0680 max mem: 9377 +Train: [84] [ 900/6250] eta: 0:13:59 lr: 0.000008 grad: 0.1448 (0.1488) loss: 0.8048 (0.8180) time: 0.1398 data: 0.0505 max mem: 9377 +Train: [84] [1000/6250] eta: 0:13:39 lr: 0.000008 grad: 0.1463 (0.1484) loss: 0.8103 (0.8171) time: 0.1480 data: 0.0673 max mem: 9377 +Train: [84] [1100/6250] eta: 0:13:20 lr: 0.000008 grad: 0.1434 (0.1483) loss: 0.8071 (0.8162) time: 0.1391 data: 0.0528 max mem: 9377 +Train: [84] [1200/6250] eta: 0:13:01 lr: 0.000008 grad: 0.1374 (0.1479) loss: 0.8108 (0.8155) time: 0.1384 data: 0.0623 max mem: 9377 +Train: [84] [1300/6250] eta: 0:12:39 lr: 0.000008 grad: 0.1435 (0.1474) loss: 0.8011 (0.8149) time: 0.1211 data: 0.0379 max mem: 9377 +Train: [84] [1400/6250] eta: 0:12:18 lr: 0.000008 grad: 0.1326 (0.1468) loss: 0.8124 (0.8145) time: 0.1382 data: 0.0563 max mem: 9377 +Train: [84] [1500/6250] eta: 0:11:56 lr: 0.000008 grad: 0.1361 (0.1465) loss: 0.8104 (0.8141) time: 0.1297 data: 0.0453 max mem: 9377 +Train: [84] [1600/6250] eta: 0:11:35 lr: 0.000008 grad: 0.1456 (0.1462) loss: 0.8051 (0.8137) time: 0.1261 data: 0.0361 max mem: 9377 +Train: [84] [1700/6250] eta: 0:11:17 lr: 0.000008 grad: 0.1400 (0.1461) loss: 0.8056 (0.8132) time: 0.1476 data: 0.0625 max mem: 9377 +Train: [84] [1800/6250] eta: 0:11:01 lr: 0.000008 grad: 0.1484 (0.1459) loss: 0.7962 (0.8127) time: 0.1290 data: 0.0480 max mem: 9377 +Train: [84] [1900/6250] eta: 0:10:48 lr: 0.000008 grad: 0.1465 (0.1456) loss: 0.7973 (0.8124) time: 0.1552 data: 0.0779 max mem: 9377 +Train: [84] [2000/6250] eta: 0:10:35 lr: 0.000008 grad: 0.1348 (0.1455) loss: 0.8089 (0.8122) time: 0.1545 data: 0.0760 max mem: 9377 +Train: [84] [2100/6250] eta: 0:10:20 lr: 0.000008 grad: 0.1293 (0.1452) loss: 0.8153 (0.8120) time: 0.1471 data: 0.0592 max mem: 9377 +Train: [84] [2200/6250] eta: 0:10:04 lr: 0.000008 grad: 0.1320 (0.1449) loss: 0.8095 (0.8118) time: 0.1467 data: 0.0664 max mem: 9377 +Train: [84] [2300/6250] eta: 0:09:50 lr: 0.000008 grad: 0.1394 (0.1445) loss: 0.8032 (0.8117) time: 0.1554 data: 0.0752 max mem: 9377 +Train: [84] [2400/6250] eta: 0:09:34 lr: 0.000008 grad: 0.1333 (0.1442) loss: 0.8080 (0.8116) time: 0.1710 data: 0.0877 max mem: 9377 +Train: [84] [2500/6250] eta: 0:09:18 lr: 0.000008 grad: 0.1383 (0.1440) loss: 0.8062 (0.8115) time: 0.1372 data: 0.0614 max mem: 9377 +Train: [84] [2600/6250] eta: 0:09:02 lr: 0.000008 grad: 0.1398 (0.1439) loss: 0.8045 (0.8115) time: 0.1211 data: 0.0359 max mem: 9377 +Train: [84] [2700/6250] eta: 0:08:47 lr: 0.000008 grad: 0.1407 (0.1437) loss: 0.8066 (0.8113) time: 0.1312 data: 0.0463 max mem: 9377 +Train: [84] [2800/6250] eta: 0:08:31 lr: 0.000008 grad: 0.1354 (0.1436) loss: 0.8085 (0.8112) time: 0.1438 data: 0.0627 max mem: 9377 +Train: [84] [2900/6250] eta: 0:08:16 lr: 0.000008 grad: 0.1313 (0.1435) loss: 0.8104 (0.8110) time: 0.1327 data: 0.0492 max mem: 9377 +Train: [84] [3000/6250] eta: 0:08:02 lr: 0.000008 grad: 0.1300 (0.1434) loss: 0.8090 (0.8109) time: 0.1677 data: 0.0918 max mem: 9377 +Train: [84] [3100/6250] eta: 0:07:47 lr: 0.000008 grad: 0.1346 (0.1432) loss: 0.8049 (0.8109) time: 0.1421 data: 0.0568 max mem: 9377 +Train: [84] [3200/6250] eta: 0:07:33 lr: 0.000008 grad: 0.1347 (0.1430) loss: 0.8108 (0.8108) time: 0.1572 data: 0.0732 max mem: 9377 +Train: [84] [3300/6250] eta: 0:07:18 lr: 0.000008 grad: 0.1369 (0.1429) loss: 0.8075 (0.8107) time: 0.1606 data: 0.0742 max mem: 9377 +Train: [84] [3400/6250] eta: 0:07:04 lr: 0.000008 grad: 0.1400 (0.1428) loss: 0.8072 (0.8107) time: 0.1171 data: 0.0278 max mem: 9377 +Train: [84] [3500/6250] eta: 0:06:48 lr: 0.000008 grad: 0.1476 (0.1429) loss: 0.8054 (0.8106) time: 0.1240 data: 0.0374 max mem: 9377 +Train: [84] [3600/6250] eta: 0:06:32 lr: 0.000008 grad: 0.1359 (0.1430) loss: 0.8049 (0.8105) time: 0.1293 data: 0.0460 max mem: 9377 +Train: [84] [3700/6250] eta: 0:06:17 lr: 0.000008 grad: 0.1352 (0.1431) loss: 0.8002 (0.8104) time: 0.1439 data: 0.0616 max mem: 9377 +Train: [84] [3800/6250] eta: 0:06:02 lr: 0.000008 grad: 0.1502 (0.1431) loss: 0.8039 (0.8103) time: 0.1498 data: 0.0682 max mem: 9377 +Train: [84] [3900/6250] eta: 0:05:47 lr: 0.000008 grad: 0.1409 (0.1431) loss: 0.8096 (0.8104) time: 0.1552 data: 0.0624 max mem: 9377 +Train: [84] [4000/6250] eta: 0:05:32 lr: 0.000008 grad: 0.1413 (0.1431) loss: 0.8137 (0.8103) time: 0.1545 data: 0.0739 max mem: 9377 +Train: [84] [4100/6250] eta: 0:05:17 lr: 0.000008 grad: 0.1330 (0.1432) loss: 0.8138 (0.8103) time: 0.1437 data: 0.0590 max mem: 9377 +Train: [84] [4200/6250] eta: 0:05:02 lr: 0.000008 grad: 0.1345 (0.1432) loss: 0.8141 (0.8102) time: 0.1485 data: 0.0694 max mem: 9377 +Train: [84] [4300/6250] eta: 0:04:47 lr: 0.000008 grad: 0.1442 (0.1432) loss: 0.8070 (0.8102) time: 0.1265 data: 0.0254 max mem: 9377 +Train: [84] [4400/6250] eta: 0:04:32 lr: 0.000008 grad: 0.1412 (0.1433) loss: 0.8135 (0.8101) time: 0.1502 data: 0.0721 max mem: 9377 +Train: [84] [4500/6250] eta: 0:04:17 lr: 0.000008 grad: 0.1428 (0.1434) loss: 0.8022 (0.8100) time: 0.1702 data: 0.0864 max mem: 9377 +Train: [84] [4600/6250] eta: 0:04:02 lr: 0.000008 grad: 0.1398 (0.1434) loss: 0.8137 (0.8099) time: 0.1407 data: 0.0589 max mem: 9377 +Train: [84] [4700/6250] eta: 0:03:47 lr: 0.000008 grad: 0.1439 (0.1434) loss: 0.8115 (0.8099) time: 0.1429 data: 0.0590 max mem: 9377 +Train: [84] [4800/6250] eta: 0:03:33 lr: 0.000008 grad: 0.1446 (0.1434) loss: 0.8077 (0.8099) time: 0.1399 data: 0.0626 max mem: 9377 +Train: [84] [4900/6250] eta: 0:03:18 lr: 0.000008 grad: 0.1498 (0.1435) loss: 0.8063 (0.8098) time: 0.1581 data: 0.0769 max mem: 9377 +Train: [84] [5000/6250] eta: 0:03:04 lr: 0.000008 grad: 0.1402 (0.1435) loss: 0.8114 (0.8098) time: 0.1998 data: 0.1085 max mem: 9377 +Train: [84] [5100/6250] eta: 0:02:49 lr: 0.000008 grad: 0.1327 (0.1435) loss: 0.8103 (0.8097) time: 0.1604 data: 0.0823 max mem: 9377 +Train: [84] [5200/6250] eta: 0:02:35 lr: 0.000008 grad: 0.1552 (0.1435) loss: 0.8068 (0.8096) time: 0.1505 data: 0.0723 max mem: 9377 +Train: [84] [5300/6250] eta: 0:02:20 lr: 0.000008 grad: 0.1432 (0.1436) loss: 0.7945 (0.8094) time: 0.1250 data: 0.0353 max mem: 9377 +Train: [84] [5400/6250] eta: 0:02:05 lr: 0.000008 grad: 0.1422 (0.1436) loss: 0.8084 (0.8094) time: 0.1448 data: 0.0652 max mem: 9377 +Train: [84] [5500/6250] eta: 0:01:50 lr: 0.000008 grad: 0.1410 (0.1436) loss: 0.8075 (0.8093) time: 0.1520 data: 0.0700 max mem: 9377 +Train: [84] [5600/6250] eta: 0:01:36 lr: 0.000008 grad: 0.1418 (0.1437) loss: 0.8106 (0.8093) time: 0.1358 data: 0.0510 max mem: 9377 +Train: [84] [5700/6250] eta: 0:01:21 lr: 0.000008 grad: 0.1379 (0.1437) loss: 0.8079 (0.8092) time: 0.1336 data: 0.0481 max mem: 9377 +Train: [84] [5800/6250] eta: 0:01:06 lr: 0.000008 grad: 0.1506 (0.1437) loss: 0.8116 (0.8091) time: 0.1332 data: 0.0505 max mem: 9377 +Train: [84] [5900/6250] eta: 0:00:51 lr: 0.000008 grad: 0.1387 (0.1437) loss: 0.8040 (0.8091) time: 0.1324 data: 0.0461 max mem: 9377 +Train: [84] [6000/6250] eta: 0:00:36 lr: 0.000008 grad: 0.1431 (0.1438) loss: 0.8021 (0.8090) time: 0.1184 data: 0.0389 max mem: 9377 +Train: [84] [6100/6250] eta: 0:00:22 lr: 0.000008 grad: 0.1390 (0.1438) loss: 0.8055 (0.8089) time: 0.1532 data: 0.0739 max mem: 9377 +Train: [84] [6200/6250] eta: 0:00:07 lr: 0.000008 grad: 0.1501 (0.1439) loss: 0.8040 (0.8088) time: 0.1328 data: 0.0441 max mem: 9377 +Train: [84] [6249/6250] eta: 0:00:00 lr: 0.000008 grad: 0.1388 (0.1439) loss: 0.8049 (0.8087) time: 0.1383 data: 0.0487 max mem: 9377 +Train: [84] Total time: 0:15:24 (0.1480 s / it) +Averaged stats: lr: 0.000008 grad: 0.1388 (0.1439) loss: 0.8049 (0.8087) +Eval (hcp-train-subset): [84] [ 0/62] eta: 0:04:12 loss: 0.8203 (0.8203) time: 4.0751 data: 3.9671 max mem: 9377 +Eval (hcp-train-subset): [84] [61/62] eta: 0:00:00 loss: 0.8132 (0.8129) time: 0.1280 data: 0.1025 max mem: 9377 +Eval (hcp-train-subset): [84] Total time: 0:00:14 (0.2376 s / it) +Averaged stats (hcp-train-subset): loss: 0.8132 (0.8129) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [84] [ 0/62] eta: 0:04:07 loss: 0.8269 (0.8269) time: 3.9919 data: 3.9182 max mem: 9377 +Eval (hcp-val): [84] [61/62] eta: 0:00:00 loss: 0.8299 (0.8300) time: 0.1237 data: 0.0981 max mem: 9377 +Eval (hcp-val): [84] Total time: 0:00:13 (0.2222 s / it) +Averaged stats (hcp-val): loss: 0.8299 (0.8300) +Making plots (hcp-val): example=23 +Eval (nsd-val): [84] [ 0/62] eta: 0:05:57 loss: 0.8051 (0.8051) time: 5.7649 data: 5.7335 max mem: 9377 +Eval (nsd-val): [84] [61/62] eta: 0:00:00 loss: 0.8109 (0.8128) time: 0.1238 data: 0.0989 max mem: 9377 +Eval (nsd-val): [84] Total time: 0:00:20 (0.3281 s / it) +Averaged stats (nsd-val): loss: 0.8109 (0.8128) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00084.pth +Train: [85] [ 0/6250] eta: 12:01:45 lr: 0.000008 grad: 0.1369 (0.1369) loss: 0.8297 (0.8297) time: 6.9290 data: 6.8119 max mem: 9377 +Train: [85] [ 100/6250] eta: 0:20:56 lr: 0.000008 grad: 0.1615 (0.1911) loss: 0.8108 (0.8074) time: 0.1544 data: 0.0545 max mem: 9377 +Train: [85] [ 200/6250] eta: 0:17:46 lr: 0.000008 grad: 0.1500 (0.1810) loss: 0.8133 (0.8087) time: 0.1420 data: 0.0505 max mem: 9377 +Train: [85] [ 300/6250] eta: 0:16:26 lr: 0.000007 grad: 0.1440 (0.1742) loss: 0.8063 (0.8081) time: 0.1398 data: 0.0469 max mem: 9377 +Train: [85] [ 400/6250] eta: 0:15:56 lr: 0.000007 grad: 0.1450 (0.1691) loss: 0.8061 (0.8092) time: 0.1986 data: 0.1001 max mem: 9377 +Train: [85] [ 500/6250] eta: 0:15:26 lr: 0.000007 grad: 0.1558 (0.1656) loss: 0.8133 (0.8099) time: 0.1480 data: 0.0678 max mem: 9377 +Train: [85] [ 600/6250] eta: 0:14:57 lr: 0.000007 grad: 0.1459 (0.1625) loss: 0.8049 (0.8099) time: 0.1279 data: 0.0445 max mem: 9377 +Train: [85] [ 700/6250] eta: 0:14:30 lr: 0.000007 grad: 0.1416 (0.1610) loss: 0.8166 (0.8098) time: 0.1419 data: 0.0527 max mem: 9377 +Train: [85] [ 800/6250] eta: 0:14:08 lr: 0.000007 grad: 0.1389 (0.1591) loss: 0.8181 (0.8105) time: 0.1297 data: 0.0401 max mem: 9377 +Train: [85] [ 900/6250] eta: 0:13:53 lr: 0.000007 grad: 0.1479 (0.1582) loss: 0.8018 (0.8106) time: 0.1559 data: 0.0680 max mem: 9377 +Train: [85] [1000/6250] eta: 0:13:36 lr: 0.000007 grad: 0.1457 (0.1576) loss: 0.8088 (0.8105) time: 0.1359 data: 0.0444 max mem: 9377 +Train: [85] [1100/6250] eta: 0:13:13 lr: 0.000007 grad: 0.1429 (0.1572) loss: 0.7986 (0.8103) time: 0.1128 data: 0.0209 max mem: 9377 +Train: [85] [1200/6250] eta: 0:12:52 lr: 0.000007 grad: 0.1442 (0.1564) loss: 0.8155 (0.8103) time: 0.1575 data: 0.0684 max mem: 9377 +Train: [85] [1300/6250] eta: 0:12:30 lr: 0.000007 grad: 0.1475 (0.1558) loss: 0.8096 (0.8101) time: 0.1316 data: 0.0417 max mem: 9377 +Train: [85] [1400/6250] eta: 0:12:12 lr: 0.000007 grad: 0.1455 (0.1552) loss: 0.8086 (0.8101) time: 0.1541 data: 0.0719 max mem: 9377 +Train: [85] [1500/6250] eta: 0:11:57 lr: 0.000007 grad: 0.1445 (0.1546) loss: 0.8118 (0.8099) time: 0.1378 data: 0.0609 max mem: 9377 +Train: [85] [1600/6250] eta: 0:11:43 lr: 0.000007 grad: 0.1403 (0.1540) loss: 0.8082 (0.8098) time: 0.1848 data: 0.1061 max mem: 9377 +Train: [85] [1700/6250] eta: 0:11:31 lr: 0.000007 grad: 0.1363 (0.1532) loss: 0.8152 (0.8099) time: 0.1806 data: 0.1043 max mem: 9377 +Train: [85] [1800/6250] eta: 0:11:21 lr: 0.000007 grad: 0.1361 (0.1527) loss: 0.8119 (0.8098) time: 0.1904 data: 0.1092 max mem: 9377 +Train: [85] [1900/6250] eta: 0:11:07 lr: 0.000007 grad: 0.1357 (0.1521) loss: 0.8143 (0.8099) time: 0.1398 data: 0.0640 max mem: 9377 +Train: [85] [2000/6250] eta: 0:10:54 lr: 0.000007 grad: 0.1352 (0.1514) loss: 0.8147 (0.8100) time: 0.1656 data: 0.0857 max mem: 9377 +Train: [85] [2100/6250] eta: 0:10:40 lr: 0.000007 grad: 0.1323 (0.1510) loss: 0.8169 (0.8100) time: 0.1598 data: 0.0823 max mem: 9377 +Train: [85] [2200/6250] eta: 0:10:28 lr: 0.000007 grad: 0.1503 (0.1507) loss: 0.8095 (0.8101) time: 0.1799 data: 0.0989 max mem: 9377 +Train: [85] [2300/6250] eta: 0:10:12 lr: 0.000007 grad: 0.1353 (0.1501) loss: 0.8100 (0.8102) time: 0.1510 data: 0.0720 max mem: 9377 +Train: [85] [2400/6250] eta: 0:09:57 lr: 0.000007 grad: 0.1337 (0.1498) loss: 0.8186 (0.8104) time: 0.1504 data: 0.0742 max mem: 9377 +Train: [85] [2500/6250] eta: 0:09:41 lr: 0.000007 grad: 0.1338 (0.1492) loss: 0.8166 (0.8106) time: 0.1668 data: 0.0879 max mem: 9377 +Train: [85] [2600/6250] eta: 0:09:24 lr: 0.000007 grad: 0.1315 (0.1487) loss: 0.8180 (0.8108) time: 0.1478 data: 0.0680 max mem: 9377 +Train: [85] [2700/6250] eta: 0:09:07 lr: 0.000007 grad: 0.1361 (0.1482) loss: 0.8157 (0.8110) time: 0.1431 data: 0.0577 max mem: 9377 +Train: [85] [2800/6250] eta: 0:08:50 lr: 0.000007 grad: 0.1348 (0.1479) loss: 0.8171 (0.8111) time: 0.1541 data: 0.0736 max mem: 9377 +Train: [85] [2900/6250] eta: 0:08:33 lr: 0.000007 grad: 0.1315 (0.1475) loss: 0.8155 (0.8113) time: 0.1447 data: 0.0525 max mem: 9377 +Train: [85] [3000/6250] eta: 0:08:17 lr: 0.000007 grad: 0.1352 (0.1471) loss: 0.8137 (0.8114) time: 0.1392 data: 0.0546 max mem: 9377 +Train: [85] [3100/6250] eta: 0:08:00 lr: 0.000007 grad: 0.1297 (0.1468) loss: 0.8171 (0.8115) time: 0.1394 data: 0.0612 max mem: 9377 +Train: [85] [3200/6250] eta: 0:07:43 lr: 0.000007 grad: 0.1346 (0.1465) loss: 0.8143 (0.8116) time: 0.1435 data: 0.0605 max mem: 9377 +Train: [85] [3300/6250] eta: 0:07:27 lr: 0.000007 grad: 0.1297 (0.1461) loss: 0.8202 (0.8118) time: 0.1582 data: 0.0828 max mem: 9377 +Train: [85] [3400/6250] eta: 0:07:11 lr: 0.000007 grad: 0.1340 (0.1458) loss: 0.8176 (0.8119) time: 0.1350 data: 0.0533 max mem: 9377 +Train: [85] [3500/6250] eta: 0:06:56 lr: 0.000007 grad: 0.1356 (0.1455) loss: 0.8137 (0.8120) time: 0.1474 data: 0.0693 max mem: 9377 +Train: [85] [3600/6250] eta: 0:06:41 lr: 0.000007 grad: 0.1292 (0.1451) loss: 0.8137 (0.8121) time: 0.1595 data: 0.0785 max mem: 9377 +Train: [85] [3700/6250] eta: 0:06:25 lr: 0.000007 grad: 0.1358 (0.1450) loss: 0.8162 (0.8121) time: 0.1659 data: 0.0857 max mem: 9377 +Train: [85] [3800/6250] eta: 0:06:10 lr: 0.000007 grad: 0.1332 (0.1448) loss: 0.8154 (0.8121) time: 0.1478 data: 0.0661 max mem: 9377 +Train: [85] [3900/6250] eta: 0:05:54 lr: 0.000007 grad: 0.1414 (0.1446) loss: 0.8103 (0.8121) time: 0.1388 data: 0.0609 max mem: 9377 +Train: [85] [4000/6250] eta: 0:05:38 lr: 0.000007 grad: 0.1372 (0.1445) loss: 0.8147 (0.8121) time: 0.1560 data: 0.0750 max mem: 9377 +Train: [85] [4100/6250] eta: 0:05:23 lr: 0.000007 grad: 0.1390 (0.1444) loss: 0.8118 (0.8121) time: 0.1336 data: 0.0486 max mem: 9377 +Train: [85] [4200/6250] eta: 0:05:08 lr: 0.000007 grad: 0.1345 (0.1442) loss: 0.8110 (0.8121) time: 0.1410 data: 0.0617 max mem: 9377 +Train: [85] [4300/6250] eta: 0:04:53 lr: 0.000007 grad: 0.1244 (0.1440) loss: 0.8167 (0.8122) time: 0.1467 data: 0.0629 max mem: 9377 +Train: [85] [4400/6250] eta: 0:04:37 lr: 0.000007 grad: 0.1347 (0.1438) loss: 0.8163 (0.8123) time: 0.1494 data: 0.0636 max mem: 9377 +Train: [85] [4500/6250] eta: 0:04:22 lr: 0.000007 grad: 0.1380 (0.1437) loss: 0.8155 (0.8123) time: 0.1542 data: 0.0740 max mem: 9377 +Train: [85] [4600/6250] eta: 0:04:08 lr: 0.000007 grad: 0.1380 (0.1436) loss: 0.8195 (0.8123) time: 0.2462 data: 0.1749 max mem: 9377 +Train: [85] [4700/6250] eta: 0:03:53 lr: 0.000007 grad: 0.1377 (0.1434) loss: 0.8124 (0.8124) time: 0.1302 data: 0.0422 max mem: 9377 +Train: [85] [4800/6250] eta: 0:03:38 lr: 0.000007 grad: 0.1280 (0.1433) loss: 0.8164 (0.8124) time: 0.1591 data: 0.0810 max mem: 9377 +Train: [85] [4900/6250] eta: 0:03:23 lr: 0.000007 grad: 0.1263 (0.1431) loss: 0.8183 (0.8125) time: 0.1519 data: 0.0675 max mem: 9377 +Train: [85] [5000/6250] eta: 0:03:08 lr: 0.000007 grad: 0.1306 (0.1430) loss: 0.8164 (0.8125) time: 0.1445 data: 0.0634 max mem: 9377 +Train: [85] [5100/6250] eta: 0:02:53 lr: 0.000007 grad: 0.1282 (0.1428) loss: 0.8151 (0.8126) time: 0.1353 data: 0.0511 max mem: 9377 +Train: [85] [5200/6250] eta: 0:02:38 lr: 0.000007 grad: 0.1289 (0.1427) loss: 0.8137 (0.8126) time: 0.1418 data: 0.0593 max mem: 9377 +Train: [85] [5300/6250] eta: 0:02:22 lr: 0.000007 grad: 0.1323 (0.1426) loss: 0.8145 (0.8127) time: 0.1441 data: 0.0624 max mem: 9377 +Train: [85] [5400/6250] eta: 0:02:07 lr: 0.000007 grad: 0.1319 (0.1425) loss: 0.8105 (0.8127) time: 0.1475 data: 0.0654 max mem: 9377 +Train: [85] [5500/6250] eta: 0:01:52 lr: 0.000007 grad: 0.1384 (0.1425) loss: 0.8171 (0.8127) time: 0.1336 data: 0.0472 max mem: 9377 +Train: [85] [5600/6250] eta: 0:01:37 lr: 0.000007 grad: 0.1433 (0.1424) loss: 0.8122 (0.8126) time: 0.1385 data: 0.0569 max mem: 9377 +Train: [85] [5700/6250] eta: 0:01:22 lr: 0.000007 grad: 0.1325 (0.1424) loss: 0.8072 (0.8126) time: 0.1412 data: 0.0555 max mem: 9377 +Train: [85] [5800/6250] eta: 0:01:07 lr: 0.000007 grad: 0.1389 (0.1424) loss: 0.8141 (0.8125) time: 0.1621 data: 0.0771 max mem: 9377 +Train: [85] [5900/6250] eta: 0:00:52 lr: 0.000007 grad: 0.1339 (0.1422) loss: 0.8163 (0.8125) time: 0.1482 data: 0.0649 max mem: 9377 +Train: [85] [6000/6250] eta: 0:00:37 lr: 0.000007 grad: 0.1376 (0.1422) loss: 0.8051 (0.8125) time: 0.1351 data: 0.0528 max mem: 9377 +Train: [85] [6100/6250] eta: 0:00:22 lr: 0.000007 grad: 0.1321 (0.1421) loss: 0.8089 (0.8125) time: 0.1425 data: 0.0551 max mem: 9377 +Train: [85] [6200/6250] eta: 0:00:07 lr: 0.000007 grad: 0.1303 (0.1420) loss: 0.8133 (0.8125) time: 0.1387 data: 0.0535 max mem: 9377 +Train: [85] [6249/6250] eta: 0:00:00 lr: 0.000007 grad: 0.1338 (0.1420) loss: 0.8077 (0.8125) time: 0.1406 data: 0.0571 max mem: 9377 +Train: [85] Total time: 0:15:34 (0.1495 s / it) +Averaged stats: lr: 0.000007 grad: 0.1338 (0.1420) loss: 0.8077 (0.8125) +Eval (hcp-train-subset): [85] [ 0/62] eta: 0:04:55 loss: 0.8194 (0.8194) time: 4.7724 data: 4.6868 max mem: 9377 +Eval (hcp-train-subset): [85] [61/62] eta: 0:00:00 loss: 0.8102 (0.8125) time: 0.1431 data: 0.1179 max mem: 9377 +Eval (hcp-train-subset): [85] Total time: 0:00:14 (0.2370 s / it) +Averaged stats (hcp-train-subset): loss: 0.8102 (0.8125) +Eval (hcp-val): [85] [ 0/62] eta: 0:05:30 loss: 0.8290 (0.8290) time: 5.3359 data: 5.3053 max mem: 9377 +Eval (hcp-val): [85] [61/62] eta: 0:00:00 loss: 0.8285 (0.8303) time: 0.1451 data: 0.1198 max mem: 9377 +Eval (hcp-val): [85] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (hcp-val): loss: 0.8285 (0.8303) +Eval (nsd-val): [85] [ 0/62] eta: 0:04:05 loss: 0.8053 (0.8053) time: 3.9671 data: 3.8871 max mem: 9377 +Eval (nsd-val): [85] [61/62] eta: 0:00:00 loss: 0.8117 (0.8151) time: 0.1333 data: 0.1081 max mem: 9377 +Eval (nsd-val): [85] Total time: 0:00:14 (0.2318 s / it) +Averaged stats (nsd-val): loss: 0.8117 (0.8151) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [86] [ 0/6250] eta: 10:10:26 lr: 0.000007 grad: 0.3975 (0.3975) loss: 0.7998 (0.7998) time: 5.8603 data: 5.6486 max mem: 9377 +Train: [86] [ 100/6250] eta: 0:21:30 lr: 0.000007 grad: 0.1401 (0.1721) loss: 0.8242 (0.8150) time: 0.1562 data: 0.0491 max mem: 9377 +Train: [86] [ 200/6250] eta: 0:18:43 lr: 0.000007 grad: 0.1482 (0.1628) loss: 0.7995 (0.8129) time: 0.1697 data: 0.0775 max mem: 9377 +Train: [86] [ 300/6250] eta: 0:17:49 lr: 0.000007 grad: 0.1425 (0.1576) loss: 0.8069 (0.8125) time: 0.1381 data: 0.0452 max mem: 9377 +Train: [86] [ 400/6250] eta: 0:16:49 lr: 0.000007 grad: 0.1287 (0.1531) loss: 0.8113 (0.8127) time: 0.1356 data: 0.0460 max mem: 9377 +Train: [86] [ 500/6250] eta: 0:16:03 lr: 0.000007 grad: 0.1444 (0.1511) loss: 0.8138 (0.8132) time: 0.1379 data: 0.0567 max mem: 9377 +Train: [86] [ 600/6250] eta: 0:15:19 lr: 0.000006 grad: 0.1399 (0.1508) loss: 0.8127 (0.8132) time: 0.1318 data: 0.0362 max mem: 9377 +Train: [86] [ 700/6250] eta: 0:14:51 lr: 0.000006 grad: 0.1280 (0.1501) loss: 0.8184 (0.8132) time: 0.1475 data: 0.0682 max mem: 9377 +Train: [86] [ 800/6250] eta: 0:14:20 lr: 0.000006 grad: 0.1402 (0.1495) loss: 0.8099 (0.8133) time: 0.1305 data: 0.0425 max mem: 9377 +Train: [86] [ 900/6250] eta: 0:13:53 lr: 0.000006 grad: 0.1422 (0.1488) loss: 0.8139 (0.8134) time: 0.1276 data: 0.0336 max mem: 9377 +Train: [86] [1000/6250] eta: 0:13:28 lr: 0.000006 grad: 0.1349 (0.1482) loss: 0.8119 (0.8134) time: 0.1334 data: 0.0496 max mem: 9377 +Train: [86] [1100/6250] eta: 0:13:06 lr: 0.000006 grad: 0.1389 (0.1479) loss: 0.8095 (0.8131) time: 0.1506 data: 0.0627 max mem: 9377 +Train: [86] [1200/6250] eta: 0:12:41 lr: 0.000006 grad: 0.1399 (0.1472) loss: 0.8214 (0.8133) time: 0.1355 data: 0.0391 max mem: 9377 +Train: [86] [1300/6250] eta: 0:12:22 lr: 0.000006 grad: 0.1391 (0.1467) loss: 0.8167 (0.8133) time: 0.1324 data: 0.0408 max mem: 9377 +Train: [86] [1400/6250] eta: 0:12:04 lr: 0.000006 grad: 0.1372 (0.1467) loss: 0.8080 (0.8132) time: 0.1458 data: 0.0614 max mem: 9377 +Train: [86] [1500/6250] eta: 0:11:46 lr: 0.000006 grad: 0.1445 (0.1462) loss: 0.8036 (0.8130) time: 0.1366 data: 0.0481 max mem: 9377 +Train: [86] [1600/6250] eta: 0:11:28 lr: 0.000006 grad: 0.1422 (0.1460) loss: 0.8081 (0.8128) time: 0.1220 data: 0.0416 max mem: 9377 +Train: [86] [1700/6250] eta: 0:11:14 lr: 0.000006 grad: 0.1397 (0.1458) loss: 0.8107 (0.8126) time: 0.1741 data: 0.0986 max mem: 9377 +Train: [86] [1800/6250] eta: 0:10:57 lr: 0.000006 grad: 0.1400 (0.1458) loss: 0.8105 (0.8124) time: 0.1243 data: 0.0414 max mem: 9377 +Train: [86] [1900/6250] eta: 0:10:41 lr: 0.000006 grad: 0.1402 (0.1457) loss: 0.8084 (0.8121) time: 0.1474 data: 0.0608 max mem: 9377 +Train: [86] [2000/6250] eta: 0:10:27 lr: 0.000006 grad: 0.1405 (0.1460) loss: 0.7966 (0.8119) time: 0.1715 data: 0.0884 max mem: 9377 +Train: [86] [2100/6250] eta: 0:10:10 lr: 0.000006 grad: 0.1327 (0.1458) loss: 0.8127 (0.8117) time: 0.1418 data: 0.0564 max mem: 9377 +Train: [86] [2200/6250] eta: 0:09:54 lr: 0.000006 grad: 0.1470 (0.1458) loss: 0.8055 (0.8115) time: 0.1543 data: 0.0733 max mem: 9377 +Train: [86] [2300/6250] eta: 0:09:38 lr: 0.000006 grad: 0.1372 (0.1457) loss: 0.8074 (0.8113) time: 0.1282 data: 0.0490 max mem: 9377 +Train: [86] [2400/6250] eta: 0:09:22 lr: 0.000006 grad: 0.1445 (0.1455) loss: 0.8062 (0.8112) time: 0.1458 data: 0.0686 max mem: 9377 +Train: [86] [2500/6250] eta: 0:09:06 lr: 0.000006 grad: 0.1405 (0.1453) loss: 0.8109 (0.8110) time: 0.1455 data: 0.0639 max mem: 9377 +Train: [86] [2600/6250] eta: 0:08:51 lr: 0.000006 grad: 0.1347 (0.1452) loss: 0.8112 (0.8110) time: 0.1337 data: 0.0530 max mem: 9377 +Train: [86] [2700/6250] eta: 0:08:35 lr: 0.000006 grad: 0.1378 (0.1451) loss: 0.8074 (0.8109) time: 0.1096 data: 0.0284 max mem: 9377 +Train: [86] [2800/6250] eta: 0:08:20 lr: 0.000006 grad: 0.1501 (0.1451) loss: 0.8097 (0.8108) time: 0.1246 data: 0.0371 max mem: 9377 +Train: [86] [2900/6250] eta: 0:08:06 lr: 0.000006 grad: 0.1442 (0.1451) loss: 0.8079 (0.8107) time: 0.1345 data: 0.0508 max mem: 9377 +Train: [86] [3000/6250] eta: 0:07:51 lr: 0.000006 grad: 0.1440 (0.1451) loss: 0.8061 (0.8106) time: 0.1251 data: 0.0341 max mem: 9377 +Train: [86] [3100/6250] eta: 0:07:36 lr: 0.000006 grad: 0.1373 (0.1450) loss: 0.8112 (0.8106) time: 0.1531 data: 0.0644 max mem: 9377 +Train: [86] [3200/6250] eta: 0:07:22 lr: 0.000006 grad: 0.1406 (0.1450) loss: 0.8113 (0.8106) time: 0.1494 data: 0.0673 max mem: 9377 +Train: [86] [3300/6250] eta: 0:07:07 lr: 0.000006 grad: 0.1270 (0.1449) loss: 0.8121 (0.8105) time: 0.1451 data: 0.0587 max mem: 9377 +Train: [86] [3400/6250] eta: 0:06:53 lr: 0.000006 grad: 0.1406 (0.1448) loss: 0.8100 (0.8105) time: 0.1419 data: 0.0574 max mem: 9377 +Train: [86] [3500/6250] eta: 0:06:38 lr: 0.000006 grad: 0.1407 (0.1448) loss: 0.8026 (0.8104) time: 0.1580 data: 0.0738 max mem: 9377 +Train: [86] [3600/6250] eta: 0:06:23 lr: 0.000006 grad: 0.1300 (0.1447) loss: 0.8122 (0.8104) time: 0.1273 data: 0.0442 max mem: 9377 +Train: [86] [3700/6250] eta: 0:06:08 lr: 0.000006 grad: 0.1411 (0.1448) loss: 0.8074 (0.8103) time: 0.1298 data: 0.0456 max mem: 9377 +Train: [86] [3800/6250] eta: 0:05:55 lr: 0.000006 grad: 0.1403 (0.1447) loss: 0.8089 (0.8102) time: 0.1599 data: 0.0758 max mem: 9377 +Train: [86] [3900/6250] eta: 0:05:40 lr: 0.000006 grad: 0.1354 (0.1446) loss: 0.8041 (0.8102) time: 0.1405 data: 0.0564 max mem: 9377 +Train: [86] [4000/6250] eta: 0:05:25 lr: 0.000006 grad: 0.1403 (0.1446) loss: 0.8078 (0.8101) time: 0.1495 data: 0.0666 max mem: 9377 +Train: [86] [4100/6250] eta: 0:05:11 lr: 0.000006 grad: 0.1400 (0.1445) loss: 0.8063 (0.8100) time: 0.1474 data: 0.0603 max mem: 9377 +Train: [86] [4200/6250] eta: 0:04:57 lr: 0.000006 grad: 0.1359 (0.1444) loss: 0.8106 (0.8101) time: 0.1741 data: 0.0975 max mem: 9377 +Train: [86] [4300/6250] eta: 0:04:43 lr: 0.000006 grad: 0.1305 (0.1444) loss: 0.8109 (0.8101) time: 0.1560 data: 0.0813 max mem: 9377 +Train: [86] [4400/6250] eta: 0:04:28 lr: 0.000006 grad: 0.1393 (0.1444) loss: 0.8126 (0.8100) time: 0.1350 data: 0.0443 max mem: 9377 +Train: [86] [4500/6250] eta: 0:04:14 lr: 0.000006 grad: 0.1504 (0.1444) loss: 0.8074 (0.8100) time: 0.1739 data: 0.0808 max mem: 9377 +Train: [86] [4600/6250] eta: 0:04:00 lr: 0.000006 grad: 0.1360 (0.1443) loss: 0.8050 (0.8100) time: 0.1589 data: 0.0825 max mem: 9377 +Train: [86] [4700/6250] eta: 0:03:46 lr: 0.000006 grad: 0.1408 (0.1442) loss: 0.8143 (0.8100) time: 0.1392 data: 0.0618 max mem: 9377 +Train: [86] [4800/6250] eta: 0:03:31 lr: 0.000006 grad: 0.1381 (0.1441) loss: 0.8070 (0.8100) time: 0.1371 data: 0.0606 max mem: 9377 +Train: [86] [4900/6250] eta: 0:03:17 lr: 0.000006 grad: 0.1426 (0.1441) loss: 0.8170 (0.8101) time: 0.1753 data: 0.0936 max mem: 9377 +Train: [86] [5000/6250] eta: 0:03:03 lr: 0.000006 grad: 0.1398 (0.1440) loss: 0.8141 (0.8101) time: 0.1498 data: 0.0595 max mem: 9377 +Train: [86] [5100/6250] eta: 0:02:49 lr: 0.000006 grad: 0.1402 (0.1440) loss: 0.8092 (0.8101) time: 0.1766 data: 0.0864 max mem: 9377 +Train: [86] [5200/6250] eta: 0:02:34 lr: 0.000006 grad: 0.1363 (0.1440) loss: 0.8166 (0.8101) time: 0.1573 data: 0.0736 max mem: 9377 +Train: [86] [5300/6250] eta: 0:02:19 lr: 0.000006 grad: 0.1367 (0.1440) loss: 0.8069 (0.8100) time: 0.1339 data: 0.0449 max mem: 9377 +Train: [86] [5400/6250] eta: 0:02:05 lr: 0.000006 grad: 0.1380 (0.1439) loss: 0.8089 (0.8100) time: 0.1083 data: 0.0211 max mem: 9377 +Train: [86] [5500/6250] eta: 0:01:50 lr: 0.000006 grad: 0.1491 (0.1439) loss: 0.8036 (0.8100) time: 0.1592 data: 0.0823 max mem: 9377 +Train: [86] [5600/6250] eta: 0:01:35 lr: 0.000006 grad: 0.1363 (0.1439) loss: 0.8106 (0.8100) time: 0.1799 data: 0.0993 max mem: 9377 +Train: [86] [5700/6250] eta: 0:01:20 lr: 0.000006 grad: 0.1356 (0.1438) loss: 0.8160 (0.8100) time: 0.1465 data: 0.0599 max mem: 9377 +Train: [86] [5800/6250] eta: 0:01:06 lr: 0.000006 grad: 0.1383 (0.1437) loss: 0.8154 (0.8101) time: 0.1608 data: 0.0868 max mem: 9377 +Train: [86] [5900/6250] eta: 0:00:51 lr: 0.000006 grad: 0.1290 (0.1436) loss: 0.8113 (0.8101) time: 0.1853 data: 0.1105 max mem: 9377 +Train: [86] [6000/6250] eta: 0:00:37 lr: 0.000006 grad: 0.1350 (0.1435) loss: 0.8106 (0.8102) time: 0.1804 data: 0.1021 max mem: 9377 +Train: [86] [6100/6250] eta: 0:00:22 lr: 0.000006 grad: 0.1388 (0.1435) loss: 0.8017 (0.8102) time: 0.1546 data: 0.0709 max mem: 9377 +Train: [86] [6200/6250] eta: 0:00:07 lr: 0.000006 grad: 0.1386 (0.1434) loss: 0.8060 (0.8102) time: 0.1411 data: 0.0574 max mem: 9377 +Train: [86] [6249/6250] eta: 0:00:00 lr: 0.000006 grad: 0.1359 (0.1434) loss: 0.8115 (0.8101) time: 0.1712 data: 0.0899 max mem: 9377 +Train: [86] Total time: 0:15:33 (0.1494 s / it) +Averaged stats: lr: 0.000006 grad: 0.1359 (0.1434) loss: 0.8115 (0.8101) +Eval (hcp-train-subset): [86] [ 0/62] eta: 0:05:11 loss: 0.8201 (0.8201) time: 5.0264 data: 4.9946 max mem: 9377 +Eval (hcp-train-subset): [86] [61/62] eta: 0:00:00 loss: 0.8098 (0.8116) time: 0.1409 data: 0.1139 max mem: 9377 +Eval (hcp-train-subset): [86] Total time: 0:00:14 (0.2415 s / it) +Averaged stats (hcp-train-subset): loss: 0.8098 (0.8116) +Eval (hcp-val): [86] [ 0/62] eta: 0:04:08 loss: 0.8271 (0.8271) time: 4.0045 data: 3.9318 max mem: 9377 +Eval (hcp-val): [86] [61/62] eta: 0:00:00 loss: 0.8277 (0.8298) time: 0.1225 data: 0.0972 max mem: 9377 +Eval (hcp-val): [86] Total time: 0:00:13 (0.2230 s / it) +Averaged stats (hcp-val): loss: 0.8277 (0.8298) +Eval (nsd-val): [86] [ 0/62] eta: 0:04:04 loss: 0.8026 (0.8026) time: 3.9390 data: 3.8839 max mem: 9377 +Eval (nsd-val): [86] [61/62] eta: 0:00:00 loss: 0.8117 (0.8137) time: 0.1515 data: 0.1247 max mem: 9377 +Eval (nsd-val): [86] Total time: 0:00:14 (0.2267 s / it) +Averaged stats (nsd-val): loss: 0.8117 (0.8137) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [87] [ 0/6250] eta: 8:27:12 lr: 0.000006 grad: 0.0897 (0.0897) loss: 0.8616 (0.8616) time: 4.8692 data: 4.5714 max mem: 9377 +Train: [87] [ 100/6250] eta: 0:23:03 lr: 0.000006 grad: 0.1687 (0.1830) loss: 0.8238 (0.8180) time: 0.1640 data: 0.0646 max mem: 9377 +Train: [87] [ 200/6250] eta: 0:20:28 lr: 0.000006 grad: 0.1385 (0.1678) loss: 0.8227 (0.8182) time: 0.1682 data: 0.0646 max mem: 9377 +Train: [87] [ 300/6250] eta: 0:18:57 lr: 0.000006 grad: 0.1363 (0.1615) loss: 0.8286 (0.8184) time: 0.1699 data: 0.0699 max mem: 9377 +Train: [87] [ 400/6250] eta: 0:17:45 lr: 0.000006 grad: 0.1422 (0.1582) loss: 0.8184 (0.8179) time: 0.1538 data: 0.0417 max mem: 9377 +Train: [87] [ 500/6250] eta: 0:16:58 lr: 0.000006 grad: 0.1402 (0.1556) loss: 0.8079 (0.8171) time: 0.1391 data: 0.0496 max mem: 9377 +Train: [87] [ 600/6250] eta: 0:16:23 lr: 0.000006 grad: 0.1490 (0.1553) loss: 0.8068 (0.8159) time: 0.1397 data: 0.0377 max mem: 9377 +Train: [87] [ 700/6250] eta: 0:15:57 lr: 0.000006 grad: 0.1441 (0.1542) loss: 0.8138 (0.8153) time: 0.1698 data: 0.0747 max mem: 9377 +Train: [87] [ 800/6250] eta: 0:15:34 lr: 0.000006 grad: 0.1519 (0.1535) loss: 0.8075 (0.8151) time: 0.1458 data: 0.0523 max mem: 9377 +Train: [87] [ 900/6250] eta: 0:15:07 lr: 0.000006 grad: 0.1407 (0.1526) loss: 0.8142 (0.8149) time: 0.1492 data: 0.0572 max mem: 9377 +Train: [87] [1000/6250] eta: 0:14:42 lr: 0.000006 grad: 0.1457 (0.1517) loss: 0.8118 (0.8147) time: 0.1641 data: 0.0742 max mem: 9377 +Train: [87] [1100/6250] eta: 0:14:23 lr: 0.000006 grad: 0.1398 (0.1514) loss: 0.8098 (0.8143) time: 0.1700 data: 0.0931 max mem: 9377 +Train: [87] [1200/6250] eta: 0:14:08 lr: 0.000006 grad: 0.1444 (0.1512) loss: 0.8101 (0.8139) time: 0.1493 data: 0.0671 max mem: 9377 +Train: [87] [1300/6250] eta: 0:13:48 lr: 0.000006 grad: 0.1407 (0.1509) loss: 0.8104 (0.8134) time: 0.1508 data: 0.0667 max mem: 9377 +Train: [87] [1400/6250] eta: 0:13:33 lr: 0.000005 grad: 0.1438 (0.1506) loss: 0.8058 (0.8131) time: 0.1636 data: 0.0884 max mem: 9377 +Train: [87] [1500/6250] eta: 0:13:21 lr: 0.000005 grad: 0.1361 (0.1501) loss: 0.8151 (0.8130) time: 0.1813 data: 0.1022 max mem: 9377 +Train: [87] [1600/6250] eta: 0:12:58 lr: 0.000005 grad: 0.1382 (0.1496) loss: 0.8067 (0.8128) time: 0.1328 data: 0.0497 max mem: 9377 +Train: [87] [1700/6250] eta: 0:12:32 lr: 0.000005 grad: 0.1414 (0.1495) loss: 0.8128 (0.8125) time: 0.1352 data: 0.0483 max mem: 9377 +Train: [87] [1800/6250] eta: 0:12:09 lr: 0.000005 grad: 0.1430 (0.1493) loss: 0.8024 (0.8124) time: 0.1487 data: 0.0682 max mem: 9377 +Train: [87] [1900/6250] eta: 0:11:47 lr: 0.000005 grad: 0.1455 (0.1490) loss: 0.8075 (0.8121) time: 0.1442 data: 0.0627 max mem: 9377 +Train: [87] [2000/6250] eta: 0:11:27 lr: 0.000005 grad: 0.1351 (0.1489) loss: 0.8120 (0.8118) time: 0.1473 data: 0.0592 max mem: 9377 +Train: [87] [2100/6250] eta: 0:11:07 lr: 0.000005 grad: 0.1491 (0.1488) loss: 0.7989 (0.8116) time: 0.1417 data: 0.0595 max mem: 9377 +Train: [87] [2200/6250] eta: 0:10:48 lr: 0.000005 grad: 0.1408 (0.1485) loss: 0.8078 (0.8114) time: 0.1301 data: 0.0412 max mem: 9377 +Train: [87] [2300/6250] eta: 0:10:29 lr: 0.000005 grad: 0.1329 (0.1482) loss: 0.8108 (0.8114) time: 0.1459 data: 0.0678 max mem: 9377 +Train: [87] [2400/6250] eta: 0:10:11 lr: 0.000005 grad: 0.1345 (0.1479) loss: 0.8184 (0.8113) time: 0.1393 data: 0.0572 max mem: 9377 +Train: [87] [2500/6250] eta: 0:09:53 lr: 0.000005 grad: 0.1360 (0.1476) loss: 0.8109 (0.8113) time: 0.1611 data: 0.0809 max mem: 9377 +Train: [87] [2600/6250] eta: 0:09:35 lr: 0.000005 grad: 0.1428 (0.1475) loss: 0.8123 (0.8113) time: 0.1417 data: 0.0659 max mem: 9377 +Train: [87] [2700/6250] eta: 0:09:18 lr: 0.000005 grad: 0.1446 (0.1472) loss: 0.8098 (0.8113) time: 0.1643 data: 0.0754 max mem: 9377 +Train: [87] [2800/6250] eta: 0:09:00 lr: 0.000005 grad: 0.1366 (0.1472) loss: 0.8136 (0.8112) time: 0.1142 data: 0.0375 max mem: 9377 +Train: [87] [2900/6250] eta: 0:08:43 lr: 0.000005 grad: 0.1461 (0.1471) loss: 0.8056 (0.8112) time: 0.1421 data: 0.0629 max mem: 9377 +Train: [87] [3000/6250] eta: 0:08:26 lr: 0.000005 grad: 0.1390 (0.1470) loss: 0.8099 (0.8112) time: 0.1461 data: 0.0711 max mem: 9377 +Train: [87] [3100/6250] eta: 0:08:09 lr: 0.000005 grad: 0.1475 (0.1470) loss: 0.8064 (0.8111) time: 0.1583 data: 0.0770 max mem: 9377 +Train: [87] [3200/6250] eta: 0:07:52 lr: 0.000005 grad: 0.1398 (0.1470) loss: 0.8161 (0.8111) time: 0.1396 data: 0.0552 max mem: 9377 +Train: [87] [3300/6250] eta: 0:07:35 lr: 0.000005 grad: 0.1400 (0.1470) loss: 0.8133 (0.8110) time: 0.1196 data: 0.0256 max mem: 9377 +Train: [87] [3400/6250] eta: 0:07:18 lr: 0.000005 grad: 0.1366 (0.1468) loss: 0.8136 (0.8109) time: 0.1201 data: 0.0356 max mem: 9377 +Train: [87] [3500/6250] eta: 0:07:03 lr: 0.000005 grad: 0.1327 (0.1467) loss: 0.8183 (0.8109) time: 0.1656 data: 0.0839 max mem: 9377 +Train: [87] [3600/6250] eta: 0:06:46 lr: 0.000005 grad: 0.1411 (0.1466) loss: 0.8048 (0.8109) time: 0.1397 data: 0.0651 max mem: 9377 +Train: [87] [3700/6250] eta: 0:06:30 lr: 0.000005 grad: 0.1431 (0.1466) loss: 0.8143 (0.8108) time: 0.1425 data: 0.0564 max mem: 9377 +Train: [87] [3800/6250] eta: 0:06:14 lr: 0.000005 grad: 0.1434 (0.1465) loss: 0.8144 (0.8108) time: 0.1406 data: 0.0456 max mem: 9377 +Train: [87] [3900/6250] eta: 0:05:58 lr: 0.000005 grad: 0.1374 (0.1464) loss: 0.8116 (0.8107) time: 0.1416 data: 0.0537 max mem: 9377 +Train: [87] [4000/6250] eta: 0:05:43 lr: 0.000005 grad: 0.1349 (0.1464) loss: 0.8079 (0.8107) time: 0.1394 data: 0.0580 max mem: 9377 +Train: [87] [4100/6250] eta: 0:05:27 lr: 0.000005 grad: 0.1350 (0.1464) loss: 0.8070 (0.8106) time: 0.1367 data: 0.0496 max mem: 9377 +Train: [87] [4200/6250] eta: 0:05:11 lr: 0.000005 grad: 0.1319 (0.1463) loss: 0.8102 (0.8105) time: 0.1426 data: 0.0600 max mem: 9377 +Train: [87] [4300/6250] eta: 0:04:56 lr: 0.000005 grad: 0.1407 (0.1463) loss: 0.8134 (0.8104) time: 0.1261 data: 0.0401 max mem: 9377 +Train: [87] [4400/6250] eta: 0:04:40 lr: 0.000005 grad: 0.1494 (0.1464) loss: 0.8037 (0.8104) time: 0.1464 data: 0.0669 max mem: 9377 +Train: [87] [4500/6250] eta: 0:04:25 lr: 0.000005 grad: 0.1511 (0.1465) loss: 0.8086 (0.8102) time: 0.1938 data: 0.1186 max mem: 9377 +Train: [87] [4600/6250] eta: 0:04:10 lr: 0.000005 grad: 0.1476 (0.1465) loss: 0.8084 (0.8101) time: 0.1237 data: 0.0441 max mem: 9377 +Train: [87] [4700/6250] eta: 0:03:55 lr: 0.000005 grad: 0.1458 (0.1465) loss: 0.8064 (0.8100) time: 0.1606 data: 0.0788 max mem: 9377 +Train: [87] [4800/6250] eta: 0:03:40 lr: 0.000005 grad: 0.1481 (0.1467) loss: 0.8004 (0.8098) time: 0.1611 data: 0.0867 max mem: 9377 +Train: [87] [4900/6250] eta: 0:03:25 lr: 0.000005 grad: 0.1488 (0.1469) loss: 0.8050 (0.8097) time: 0.1366 data: 0.0539 max mem: 9377 +Train: [87] [5000/6250] eta: 0:03:09 lr: 0.000005 grad: 0.1545 (0.1470) loss: 0.8063 (0.8096) time: 0.1448 data: 0.0656 max mem: 9377 +Train: [87] [5100/6250] eta: 0:02:54 lr: 0.000005 grad: 0.1414 (0.1471) loss: 0.8058 (0.8096) time: 0.1382 data: 0.0524 max mem: 9377 +Train: [87] [5200/6250] eta: 0:02:39 lr: 0.000005 grad: 0.1400 (0.1471) loss: 0.8048 (0.8096) time: 0.1566 data: 0.0730 max mem: 9377 +Train: [87] [5300/6250] eta: 0:02:23 lr: 0.000005 grad: 0.1438 (0.1470) loss: 0.8097 (0.8096) time: 0.1434 data: 0.0584 max mem: 9377 +Train: [87] [5400/6250] eta: 0:02:08 lr: 0.000005 grad: 0.1393 (0.1470) loss: 0.8148 (0.8096) time: 0.1342 data: 0.0455 max mem: 9377 +Train: [87] [5500/6250] eta: 0:01:52 lr: 0.000005 grad: 0.1463 (0.1471) loss: 0.8121 (0.8095) time: 0.1229 data: 0.0342 max mem: 9377 +Train: [87] [5600/6250] eta: 0:01:37 lr: 0.000005 grad: 0.1580 (0.1472) loss: 0.8047 (0.8095) time: 0.1403 data: 0.0475 max mem: 9377 +Train: [87] [5700/6250] eta: 0:01:22 lr: 0.000005 grad: 0.1395 (0.1472) loss: 0.8107 (0.8094) time: 0.1481 data: 0.0609 max mem: 9377 +Train: [87] [5800/6250] eta: 0:01:07 lr: 0.000005 grad: 0.1439 (0.1473) loss: 0.8128 (0.8094) time: 0.1550 data: 0.0768 max mem: 9377 +Train: [87] [5900/6250] eta: 0:00:52 lr: 0.000005 grad: 0.1362 (0.1473) loss: 0.8150 (0.8094) time: 0.1216 data: 0.0295 max mem: 9377 +Train: [87] [6000/6250] eta: 0:00:37 lr: 0.000005 grad: 0.1374 (0.1473) loss: 0.8087 (0.8094) time: 0.1293 data: 0.0465 max mem: 9377 +Train: [87] [6100/6250] eta: 0:00:22 lr: 0.000005 grad: 0.1389 (0.1473) loss: 0.8138 (0.8094) time: 0.1448 data: 0.0676 max mem: 9377 +Train: [87] [6200/6250] eta: 0:00:07 lr: 0.000005 grad: 0.1501 (0.1472) loss: 0.8166 (0.8095) time: 0.1600 data: 0.0867 max mem: 9377 +Train: [87] [6249/6250] eta: 0:00:00 lr: 0.000005 grad: 0.1428 (0.1473) loss: 0.8050 (0.8095) time: 0.1428 data: 0.0548 max mem: 9377 +Train: [87] Total time: 0:15:35 (0.1497 s / it) +Averaged stats: lr: 0.000005 grad: 0.1428 (0.1473) loss: 0.8050 (0.8095) +Eval (hcp-train-subset): [87] [ 0/62] eta: 0:05:45 loss: 0.8191 (0.8191) time: 5.5799 data: 5.5469 max mem: 9377 +Eval (hcp-train-subset): [87] [61/62] eta: 0:00:00 loss: 0.8087 (0.8112) time: 0.1385 data: 0.1134 max mem: 9377 +Eval (hcp-train-subset): [87] Total time: 0:00:14 (0.2267 s / it) +Averaged stats (hcp-train-subset): loss: 0.8087 (0.8112) +Eval (hcp-val): [87] [ 0/62] eta: 0:05:41 loss: 0.8252 (0.8252) time: 5.5080 data: 5.4780 max mem: 9377 +Eval (hcp-val): [87] [61/62] eta: 0:00:00 loss: 0.8291 (0.8292) time: 0.1227 data: 0.0972 max mem: 9377 +Eval (hcp-val): [87] Total time: 0:00:13 (0.2217 s / it) +Averaged stats (hcp-val): loss: 0.8291 (0.8292) +Eval (nsd-val): [87] [ 0/62] eta: 0:05:16 loss: 0.8018 (0.8018) time: 5.1062 data: 5.0745 max mem: 9377 +Eval (nsd-val): [87] [61/62] eta: 0:00:00 loss: 0.8109 (0.8130) time: 0.1087 data: 0.0818 max mem: 9377 +Eval (nsd-val): [87] Total time: 0:00:13 (0.2141 s / it) +Averaged stats (nsd-val): loss: 0.8109 (0.8130) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [88] [ 0/6250] eta: 9:52:57 lr: 0.000005 grad: 0.1665 (0.1665) loss: 0.8338 (0.8338) time: 5.6924 data: 5.4662 max mem: 9377 +Train: [88] [ 100/6250] eta: 0:20:41 lr: 0.000005 grad: 0.1567 (0.1896) loss: 0.8056 (0.8232) time: 0.1604 data: 0.0623 max mem: 9377 +Train: [88] [ 200/6250] eta: 0:18:32 lr: 0.000005 grad: 0.1667 (0.1836) loss: 0.8120 (0.8162) time: 0.1437 data: 0.0540 max mem: 9377 +Train: [88] [ 300/6250] eta: 0:17:24 lr: 0.000005 grad: 0.1466 (0.1746) loss: 0.8111 (0.8146) time: 0.1484 data: 0.0605 max mem: 9377 +Train: [88] [ 400/6250] eta: 0:16:25 lr: 0.000005 grad: 0.1462 (0.1683) loss: 0.8112 (0.8138) time: 0.1376 data: 0.0461 max mem: 9377 +Train: [88] [ 500/6250] eta: 0:15:45 lr: 0.000005 grad: 0.1475 (0.1657) loss: 0.8054 (0.8124) time: 0.1469 data: 0.0553 max mem: 9377 +Train: [88] [ 600/6250] eta: 0:15:11 lr: 0.000005 grad: 0.1452 (0.1640) loss: 0.8106 (0.8111) time: 0.1487 data: 0.0598 max mem: 9377 +Train: [88] [ 700/6250] eta: 0:14:43 lr: 0.000005 grad: 0.1469 (0.1620) loss: 0.8136 (0.8104) time: 0.1505 data: 0.0639 max mem: 9377 +Train: [88] [ 800/6250] eta: 0:14:17 lr: 0.000005 grad: 0.1451 (0.1603) loss: 0.8049 (0.8100) time: 0.1602 data: 0.0639 max mem: 9377 +Train: [88] [ 900/6250] eta: 0:13:47 lr: 0.000005 grad: 0.1423 (0.1595) loss: 0.8128 (0.8100) time: 0.1449 data: 0.0524 max mem: 9377 +Train: [88] [1000/6250] eta: 0:13:21 lr: 0.000005 grad: 0.1455 (0.1591) loss: 0.8120 (0.8098) time: 0.1256 data: 0.0411 max mem: 9377 +Train: [88] [1100/6250] eta: 0:13:02 lr: 0.000005 grad: 0.1522 (0.1589) loss: 0.8112 (0.8095) time: 0.1608 data: 0.0759 max mem: 9377 +Train: [88] [1200/6250] eta: 0:12:42 lr: 0.000005 grad: 0.1480 (0.1583) loss: 0.8044 (0.8092) time: 0.1278 data: 0.0339 max mem: 9377 +Train: [88] [1300/6250] eta: 0:12:25 lr: 0.000005 grad: 0.1408 (0.1578) loss: 0.8115 (0.8090) time: 0.1588 data: 0.0712 max mem: 9377 +Train: [88] [1400/6250] eta: 0:12:07 lr: 0.000005 grad: 0.1384 (0.1571) loss: 0.8149 (0.8091) time: 0.1397 data: 0.0527 max mem: 9377 +Train: [88] [1500/6250] eta: 0:11:49 lr: 0.000005 grad: 0.1446 (0.1566) loss: 0.8036 (0.8090) time: 0.1191 data: 0.0402 max mem: 9377 +Train: [88] [1600/6250] eta: 0:11:32 lr: 0.000005 grad: 0.1511 (0.1565) loss: 0.8013 (0.8087) time: 0.1469 data: 0.0658 max mem: 9377 +Train: [88] [1700/6250] eta: 0:11:12 lr: 0.000005 grad: 0.1517 (0.1562) loss: 0.8123 (0.8088) time: 0.1414 data: 0.0610 max mem: 9377 +Train: [88] [1800/6250] eta: 0:10:56 lr: 0.000005 grad: 0.1451 (0.1557) loss: 0.8159 (0.8088) time: 0.1511 data: 0.0712 max mem: 9377 +Train: [88] [1900/6250] eta: 0:10:40 lr: 0.000005 grad: 0.1516 (0.1553) loss: 0.8045 (0.8088) time: 0.1514 data: 0.0652 max mem: 9377 +Train: [88] [2000/6250] eta: 0:10:24 lr: 0.000005 grad: 0.1491 (0.1548) loss: 0.8042 (0.8088) time: 0.1296 data: 0.0452 max mem: 9377 +Train: [88] [2100/6250] eta: 0:10:09 lr: 0.000005 grad: 0.1329 (0.1542) loss: 0.8144 (0.8090) time: 0.1250 data: 0.0391 max mem: 9377 +Train: [88] [2200/6250] eta: 0:09:54 lr: 0.000005 grad: 0.1375 (0.1536) loss: 0.8178 (0.8092) time: 0.1546 data: 0.0748 max mem: 9377 +Train: [88] [2300/6250] eta: 0:09:41 lr: 0.000005 grad: 0.1451 (0.1533) loss: 0.8043 (0.8092) time: 0.1399 data: 0.0542 max mem: 9377 +Train: [88] [2400/6250] eta: 0:09:26 lr: 0.000005 grad: 0.1523 (0.1531) loss: 0.8091 (0.8092) time: 0.1269 data: 0.0455 max mem: 9377 +Train: [88] [2500/6250] eta: 0:09:12 lr: 0.000005 grad: 0.1384 (0.1528) loss: 0.8134 (0.8094) time: 0.1423 data: 0.0643 max mem: 9377 +Train: [88] [2600/6250] eta: 0:08:58 lr: 0.000005 grad: 0.1456 (0.1525) loss: 0.8126 (0.8095) time: 0.1564 data: 0.0759 max mem: 9377 +Train: [88] [2700/6250] eta: 0:08:43 lr: 0.000005 grad: 0.1359 (0.1524) loss: 0.8195 (0.8096) time: 0.1469 data: 0.0664 max mem: 9377 +Train: [88] [2800/6250] eta: 0:08:27 lr: 0.000005 grad: 0.1362 (0.1521) loss: 0.8106 (0.8097) time: 0.1115 data: 0.0194 max mem: 9377 +Train: [88] [2900/6250] eta: 0:08:12 lr: 0.000004 grad: 0.1420 (0.1518) loss: 0.8140 (0.8098) time: 0.1388 data: 0.0548 max mem: 9377 +Train: [88] [3000/6250] eta: 0:07:57 lr: 0.000004 grad: 0.1427 (0.1518) loss: 0.8090 (0.8097) time: 0.1533 data: 0.0757 max mem: 9377 +Train: [88] [3100/6250] eta: 0:07:42 lr: 0.000004 grad: 0.1411 (0.1517) loss: 0.8099 (0.8097) time: 0.1419 data: 0.0575 max mem: 9377 +Train: [88] [3200/6250] eta: 0:07:27 lr: 0.000004 grad: 0.1524 (0.1517) loss: 0.8067 (0.8096) time: 0.1226 data: 0.0374 max mem: 9377 +Train: [88] [3300/6250] eta: 0:07:12 lr: 0.000004 grad: 0.1415 (0.1515) loss: 0.8090 (0.8096) time: 0.1426 data: 0.0558 max mem: 9377 +Train: [88] [3400/6250] eta: 0:06:57 lr: 0.000004 grad: 0.1488 (0.1514) loss: 0.8067 (0.8096) time: 0.1405 data: 0.0512 max mem: 9377 +Train: [88] [3500/6250] eta: 0:06:43 lr: 0.000004 grad: 0.1400 (0.1513) loss: 0.8124 (0.8095) time: 0.1431 data: 0.0596 max mem: 9377 +Train: [88] [3600/6250] eta: 0:06:27 lr: 0.000004 grad: 0.1456 (0.1512) loss: 0.8015 (0.8094) time: 0.1127 data: 0.0220 max mem: 9377 +Train: [88] [3700/6250] eta: 0:06:12 lr: 0.000004 grad: 0.1338 (0.1512) loss: 0.8124 (0.8093) time: 0.1339 data: 0.0562 max mem: 9377 +Train: [88] [3800/6250] eta: 0:05:57 lr: 0.000004 grad: 0.1458 (0.1512) loss: 0.8069 (0.8092) time: 0.1405 data: 0.0565 max mem: 9377 +Train: [88] [3900/6250] eta: 0:05:42 lr: 0.000004 grad: 0.1513 (0.1512) loss: 0.8079 (0.8091) time: 0.1417 data: 0.0562 max mem: 9377 +Train: [88] [4000/6250] eta: 0:05:28 lr: 0.000004 grad: 0.1395 (0.1512) loss: 0.8103 (0.8090) time: 0.1324 data: 0.0500 max mem: 9377 +Train: [88] [4100/6250] eta: 0:05:13 lr: 0.000004 grad: 0.1523 (0.1512) loss: 0.8104 (0.8089) time: 0.1321 data: 0.0363 max mem: 9377 +Train: [88] [4200/6250] eta: 0:04:58 lr: 0.000004 grad: 0.1452 (0.1511) loss: 0.8091 (0.8089) time: 0.1570 data: 0.0688 max mem: 9377 +Train: [88] [4300/6250] eta: 0:04:43 lr: 0.000004 grad: 0.1484 (0.1510) loss: 0.8126 (0.8088) time: 0.1543 data: 0.0710 max mem: 9377 +Train: [88] [4400/6250] eta: 0:04:30 lr: 0.000004 grad: 0.1433 (0.1510) loss: 0.8103 (0.8087) time: 0.1684 data: 0.0808 max mem: 9377 +Train: [88] [4500/6250] eta: 0:04:15 lr: 0.000004 grad: 0.1346 (0.1509) loss: 0.8106 (0.8087) time: 0.1397 data: 0.0537 max mem: 9377 +Train: [88] [4600/6250] eta: 0:04:01 lr: 0.000004 grad: 0.1471 (0.1508) loss: 0.8057 (0.8086) time: 0.1536 data: 0.0706 max mem: 9377 +Train: [88] [4700/6250] eta: 0:03:47 lr: 0.000004 grad: 0.1426 (0.1508) loss: 0.8017 (0.8085) time: 0.1633 data: 0.0799 max mem: 9377 +Train: [88] [4800/6250] eta: 0:03:32 lr: 0.000004 grad: 0.1483 (0.1508) loss: 0.7967 (0.8085) time: 0.1335 data: 0.0445 max mem: 9377 +Train: [88] [4900/6250] eta: 0:03:17 lr: 0.000004 grad: 0.1428 (0.1508) loss: 0.8027 (0.8084) time: 0.1972 data: 0.1170 max mem: 9377 +Train: [88] [5000/6250] eta: 0:03:02 lr: 0.000004 grad: 0.1458 (0.1507) loss: 0.8090 (0.8084) time: 0.1381 data: 0.0472 max mem: 9377 +Train: [88] [5100/6250] eta: 0:02:47 lr: 0.000004 grad: 0.1397 (0.1507) loss: 0.8068 (0.8083) time: 0.1409 data: 0.0634 max mem: 9377 +Train: [88] [5200/6250] eta: 0:02:33 lr: 0.000004 grad: 0.1450 (0.1506) loss: 0.8088 (0.8083) time: 0.1437 data: 0.0622 max mem: 9377 +Train: [88] [5300/6250] eta: 0:02:18 lr: 0.000004 grad: 0.1453 (0.1507) loss: 0.8061 (0.8083) time: 0.1485 data: 0.0629 max mem: 9377 +Train: [88] [5400/6250] eta: 0:02:03 lr: 0.000004 grad: 0.1397 (0.1506) loss: 0.8096 (0.8082) time: 0.1422 data: 0.0616 max mem: 9377 +Train: [88] [5500/6250] eta: 0:01:49 lr: 0.000004 grad: 0.1496 (0.1505) loss: 0.8019 (0.8081) time: 0.1397 data: 0.0592 max mem: 9377 +Train: [88] [5600/6250] eta: 0:01:34 lr: 0.000004 grad: 0.1457 (0.1504) loss: 0.8040 (0.8081) time: 0.1704 data: 0.0877 max mem: 9377 +Train: [88] [5700/6250] eta: 0:01:20 lr: 0.000004 grad: 0.1473 (0.1503) loss: 0.8103 (0.8081) time: 0.1454 data: 0.0633 max mem: 9377 +Train: [88] [5800/6250] eta: 0:01:05 lr: 0.000004 grad: 0.1432 (0.1503) loss: 0.8029 (0.8080) time: 0.1445 data: 0.0680 max mem: 9377 +Train: [88] [5900/6250] eta: 0:00:51 lr: 0.000004 grad: 0.1416 (0.1503) loss: 0.8091 (0.8079) time: 0.1429 data: 0.0658 max mem: 9377 +Train: [88] [6000/6250] eta: 0:00:36 lr: 0.000004 grad: 0.1473 (0.1502) loss: 0.8023 (0.8079) time: 0.1315 data: 0.0471 max mem: 9377 +Train: [88] [6100/6250] eta: 0:00:21 lr: 0.000004 grad: 0.1538 (0.1502) loss: 0.8088 (0.8078) time: 0.1418 data: 0.0633 max mem: 9377 +Train: [88] [6200/6250] eta: 0:00:07 lr: 0.000004 grad: 0.1414 (0.1503) loss: 0.8097 (0.8078) time: 0.1134 data: 0.0299 max mem: 9377 +Train: [88] [6249/6250] eta: 0:00:00 lr: 0.000004 grad: 0.1452 (0.1503) loss: 0.8093 (0.8078) time: 0.1191 data: 0.0341 max mem: 9377 +Train: [88] Total time: 0:15:16 (0.1466 s / it) +Averaged stats: lr: 0.000004 grad: 0.1452 (0.1503) loss: 0.8093 (0.8078) +Eval (hcp-train-subset): [88] [ 0/62] eta: 0:05:51 loss: 0.8190 (0.8190) time: 5.6633 data: 5.6304 max mem: 9377 +Eval (hcp-train-subset): [88] [61/62] eta: 0:00:00 loss: 0.8080 (0.8102) time: 0.1627 data: 0.1376 max mem: 9377 +Eval (hcp-train-subset): [88] Total time: 0:00:16 (0.2603 s / it) +Averaged stats (hcp-train-subset): loss: 0.8080 (0.8102) +Eval (hcp-val): [88] [ 0/62] eta: 0:06:22 loss: 0.8237 (0.8237) time: 6.1623 data: 6.1318 max mem: 9377 +Eval (hcp-val): [88] [61/62] eta: 0:00:00 loss: 0.8288 (0.8294) time: 0.1471 data: 0.1217 max mem: 9377 +Eval (hcp-val): [88] Total time: 0:00:15 (0.2464 s / it) +Averaged stats (hcp-val): loss: 0.8288 (0.8294) +Eval (nsd-val): [88] [ 0/62] eta: 0:05:16 loss: 0.8062 (0.8062) time: 5.1101 data: 5.0383 max mem: 9377 +Eval (nsd-val): [88] [61/62] eta: 0:00:00 loss: 0.8124 (0.8134) time: 0.1580 data: 0.1325 max mem: 9377 +Eval (nsd-val): [88] Total time: 0:00:15 (0.2455 s / it) +Averaged stats (nsd-val): loss: 0.8124 (0.8134) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [89] [ 0/6250] eta: 9:05:42 lr: 0.000004 grad: 0.1454 (0.1454) loss: 0.8537 (0.8537) time: 5.2388 data: 4.9598 max mem: 9377 +Train: [89] [ 100/6250] eta: 0:23:09 lr: 0.000004 grad: 0.1646 (0.1776) loss: 0.8241 (0.8261) time: 0.1616 data: 0.0577 max mem: 9377 +Train: [89] [ 200/6250] eta: 0:19:50 lr: 0.000004 grad: 0.1413 (0.1635) loss: 0.8247 (0.8228) time: 0.1548 data: 0.0610 max mem: 9377 +Train: [89] [ 300/6250] eta: 0:18:14 lr: 0.000004 grad: 0.1433 (0.1587) loss: 0.8165 (0.8210) time: 0.1799 data: 0.0884 max mem: 9377 +Train: [89] [ 400/6250] eta: 0:17:10 lr: 0.000004 grad: 0.1455 (0.1550) loss: 0.8153 (0.8196) time: 0.1621 data: 0.0752 max mem: 9377 +Train: [89] [ 500/6250] eta: 0:16:20 lr: 0.000004 grad: 0.1310 (0.1521) loss: 0.8182 (0.8192) time: 0.1689 data: 0.0771 max mem: 9377 +Train: [89] [ 600/6250] eta: 0:15:39 lr: 0.000004 grad: 0.1377 (0.1505) loss: 0.8152 (0.8189) time: 0.1307 data: 0.0344 max mem: 9377 +Train: [89] [ 700/6250] eta: 0:15:07 lr: 0.000004 grad: 0.1396 (0.1496) loss: 0.8131 (0.8184) time: 0.1444 data: 0.0520 max mem: 9377 +Train: [89] [ 800/6250] eta: 0:14:34 lr: 0.000004 grad: 0.1422 (0.1490) loss: 0.8095 (0.8177) time: 0.1220 data: 0.0273 max mem: 9377 +Train: [89] [ 900/6250] eta: 0:14:06 lr: 0.000004 grad: 0.1407 (0.1489) loss: 0.8185 (0.8171) time: 0.1177 data: 0.0099 max mem: 9377 +Train: [89] [1000/6250] eta: 0:13:38 lr: 0.000004 grad: 0.1458 (0.1486) loss: 0.8142 (0.8169) time: 0.1419 data: 0.0550 max mem: 9377 +Train: [89] [1100/6250] eta: 0:13:17 lr: 0.000004 grad: 0.1320 (0.1482) loss: 0.8124 (0.8167) time: 0.1191 data: 0.0310 max mem: 9377 +Train: [89] [1200/6250] eta: 0:13:03 lr: 0.000004 grad: 0.1431 (0.1477) loss: 0.8106 (0.8164) time: 0.2002 data: 0.1162 max mem: 9377 +Train: [89] [1300/6250] eta: 0:12:40 lr: 0.000004 grad: 0.1464 (0.1475) loss: 0.8121 (0.8163) time: 0.1454 data: 0.0657 max mem: 9377 +Train: [89] [1400/6250] eta: 0:12:21 lr: 0.000004 grad: 0.1355 (0.1472) loss: 0.8110 (0.8160) time: 0.1550 data: 0.0673 max mem: 9377 +Train: [89] [1500/6250] eta: 0:12:04 lr: 0.000004 grad: 0.1428 (0.1472) loss: 0.8153 (0.8159) time: 0.1366 data: 0.0527 max mem: 9377 +Train: [89] [1600/6250] eta: 0:11:49 lr: 0.000004 grad: 0.1397 (0.1469) loss: 0.8169 (0.8156) time: 0.1526 data: 0.0639 max mem: 9377 +Train: [89] [1700/6250] eta: 0:11:34 lr: 0.000004 grad: 0.1401 (0.1468) loss: 0.8153 (0.8153) time: 0.1473 data: 0.0642 max mem: 9377 +Train: [89] [1800/6250] eta: 0:11:17 lr: 0.000004 grad: 0.1397 (0.1466) loss: 0.8157 (0.8151) time: 0.1409 data: 0.0518 max mem: 9377 +Train: [89] [1900/6250] eta: 0:11:00 lr: 0.000004 grad: 0.1329 (0.1466) loss: 0.8157 (0.8148) time: 0.1519 data: 0.0717 max mem: 9377 +Train: [89] [2000/6250] eta: 0:10:44 lr: 0.000004 grad: 0.1375 (0.1464) loss: 0.8162 (0.8146) time: 0.1338 data: 0.0476 max mem: 9377 +Train: [89] [2100/6250] eta: 0:10:28 lr: 0.000004 grad: 0.1406 (0.1462) loss: 0.8139 (0.8145) time: 0.1551 data: 0.0685 max mem: 9377 +Train: [89] [2200/6250] eta: 0:10:12 lr: 0.000004 grad: 0.1407 (0.1460) loss: 0.8096 (0.8143) time: 0.1393 data: 0.0552 max mem: 9377 +Train: [89] [2300/6250] eta: 0:09:56 lr: 0.000004 grad: 0.1450 (0.1460) loss: 0.8120 (0.8142) time: 0.1476 data: 0.0653 max mem: 9377 +Train: [89] [2400/6250] eta: 0:09:41 lr: 0.000004 grad: 0.1379 (0.1458) loss: 0.8077 (0.8141) time: 0.1603 data: 0.0756 max mem: 9377 +Train: [89] [2500/6250] eta: 0:09:25 lr: 0.000004 grad: 0.1452 (0.1460) loss: 0.8074 (0.8139) time: 0.1629 data: 0.0817 max mem: 9377 +Train: [89] [2600/6250] eta: 0:09:07 lr: 0.000004 grad: 0.1371 (0.1459) loss: 0.8127 (0.8138) time: 0.1305 data: 0.0437 max mem: 9377 +Train: [89] [2700/6250] eta: 0:08:51 lr: 0.000004 grad: 0.1520 (0.1459) loss: 0.8170 (0.8137) time: 0.1075 data: 0.0109 max mem: 9377 +Train: [89] [2800/6250] eta: 0:08:35 lr: 0.000004 grad: 0.1376 (0.1459) loss: 0.8076 (0.8136) time: 0.1401 data: 0.0592 max mem: 9377 +Train: [89] [2900/6250] eta: 0:08:18 lr: 0.000004 grad: 0.1407 (0.1458) loss: 0.8106 (0.8135) time: 0.1480 data: 0.0572 max mem: 9377 +Train: [89] [3000/6250] eta: 0:08:03 lr: 0.000004 grad: 0.1433 (0.1457) loss: 0.8046 (0.8132) time: 0.1388 data: 0.0569 max mem: 9377 +Train: [89] [3100/6250] eta: 0:07:47 lr: 0.000004 grad: 0.1420 (0.1457) loss: 0.8041 (0.8130) time: 0.1345 data: 0.0536 max mem: 9377 +Train: [89] [3200/6250] eta: 0:07:32 lr: 0.000004 grad: 0.1484 (0.1458) loss: 0.8099 (0.8130) time: 0.1609 data: 0.0834 max mem: 9377 +Train: [89] [3300/6250] eta: 0:07:17 lr: 0.000004 grad: 0.1415 (0.1458) loss: 0.8105 (0.8129) time: 0.1588 data: 0.0746 max mem: 9377 +Train: [89] [3400/6250] eta: 0:07:01 lr: 0.000004 grad: 0.1495 (0.1457) loss: 0.8133 (0.8128) time: 0.1226 data: 0.0369 max mem: 9377 +Train: [89] [3500/6250] eta: 0:06:46 lr: 0.000004 grad: 0.1359 (0.1459) loss: 0.8053 (0.8128) time: 0.1483 data: 0.0651 max mem: 9377 +Train: [89] [3600/6250] eta: 0:06:31 lr: 0.000004 grad: 0.1508 (0.1459) loss: 0.8085 (0.8127) time: 0.1506 data: 0.0684 max mem: 9377 +Train: [89] [3700/6250] eta: 0:06:17 lr: 0.000004 grad: 0.1452 (0.1459) loss: 0.8101 (0.8126) time: 0.1592 data: 0.0718 max mem: 9377 +Train: [89] [3800/6250] eta: 0:06:02 lr: 0.000004 grad: 0.1456 (0.1459) loss: 0.8064 (0.8125) time: 0.1688 data: 0.0839 max mem: 9377 +Train: [89] [3900/6250] eta: 0:05:48 lr: 0.000004 grad: 0.1388 (0.1458) loss: 0.8059 (0.8124) time: 0.1526 data: 0.0637 max mem: 9377 +Train: [89] [4000/6250] eta: 0:05:33 lr: 0.000004 grad: 0.1381 (0.1458) loss: 0.8125 (0.8123) time: 0.1553 data: 0.0771 max mem: 9377 +Train: [89] [4100/6250] eta: 0:05:18 lr: 0.000004 grad: 0.1435 (0.1458) loss: 0.8092 (0.8122) time: 0.1250 data: 0.0396 max mem: 9377 +Train: [89] [4200/6250] eta: 0:05:05 lr: 0.000004 grad: 0.1366 (0.1458) loss: 0.8082 (0.8121) time: 0.1966 data: 0.1102 max mem: 9377 +Train: [89] [4300/6250] eta: 0:04:51 lr: 0.000004 grad: 0.1492 (0.1458) loss: 0.7989 (0.8119) time: 0.1961 data: 0.0587 max mem: 9377 +Train: [89] [4400/6250] eta: 0:04:36 lr: 0.000004 grad: 0.1459 (0.1459) loss: 0.8092 (0.8118) time: 0.1439 data: 0.0635 max mem: 9377 +Train: [89] [4500/6250] eta: 0:04:21 lr: 0.000004 grad: 0.1539 (0.1460) loss: 0.8096 (0.8117) time: 0.1281 data: 0.0444 max mem: 9377 +Train: [89] [4600/6250] eta: 0:04:06 lr: 0.000004 grad: 0.1395 (0.1461) loss: 0.8122 (0.8116) time: 0.1310 data: 0.0476 max mem: 9377 +Train: [89] [4700/6250] eta: 0:03:51 lr: 0.000004 grad: 0.1410 (0.1461) loss: 0.8113 (0.8115) time: 0.1259 data: 0.0424 max mem: 9377 +Train: [89] [4800/6250] eta: 0:03:36 lr: 0.000004 grad: 0.1373 (0.1462) loss: 0.8135 (0.8115) time: 0.1519 data: 0.0683 max mem: 9377 +Train: [89] [4900/6250] eta: 0:03:21 lr: 0.000004 grad: 0.1370 (0.1462) loss: 0.8082 (0.8115) time: 0.1292 data: 0.0356 max mem: 9377 +Train: [89] [5000/6250] eta: 0:03:06 lr: 0.000004 grad: 0.1537 (0.1463) loss: 0.8085 (0.8114) time: 0.1333 data: 0.0489 max mem: 9377 +Train: [89] [5100/6250] eta: 0:02:50 lr: 0.000004 grad: 0.1361 (0.1464) loss: 0.8124 (0.8114) time: 0.1329 data: 0.0482 max mem: 9377 +Train: [89] [5200/6250] eta: 0:02:35 lr: 0.000003 grad: 0.1466 (0.1465) loss: 0.8068 (0.8113) time: 0.1268 data: 0.0361 max mem: 9377 +Train: [89] [5300/6250] eta: 0:02:20 lr: 0.000003 grad: 0.1373 (0.1465) loss: 0.8145 (0.8114) time: 0.1397 data: 0.0509 max mem: 9377 +Train: [89] [5400/6250] eta: 0:02:05 lr: 0.000003 grad: 0.1445 (0.1465) loss: 0.8201 (0.8114) time: 0.1390 data: 0.0535 max mem: 9377 +Train: [89] [5500/6250] eta: 0:01:50 lr: 0.000003 grad: 0.1482 (0.1465) loss: 0.8115 (0.8114) time: 0.1427 data: 0.0598 max mem: 9377 +Train: [89] [5600/6250] eta: 0:01:35 lr: 0.000003 grad: 0.1398 (0.1465) loss: 0.8052 (0.8114) time: 0.1231 data: 0.0414 max mem: 9377 +Train: [89] [5700/6250] eta: 0:01:20 lr: 0.000003 grad: 0.1406 (0.1465) loss: 0.8070 (0.8113) time: 0.1492 data: 0.0699 max mem: 9377 +Train: [89] [5800/6250] eta: 0:01:06 lr: 0.000003 grad: 0.1427 (0.1466) loss: 0.8152 (0.8113) time: 0.1339 data: 0.0518 max mem: 9377 +Train: [89] [5900/6250] eta: 0:00:51 lr: 0.000003 grad: 0.1464 (0.1466) loss: 0.8088 (0.8114) time: 0.1162 data: 0.0291 max mem: 9377 +Train: [89] [6000/6250] eta: 0:00:36 lr: 0.000003 grad: 0.1433 (0.1465) loss: 0.8103 (0.8114) time: 0.1428 data: 0.0609 max mem: 9377 +Train: [89] [6100/6250] eta: 0:00:21 lr: 0.000003 grad: 0.1413 (0.1465) loss: 0.8136 (0.8115) time: 0.1505 data: 0.0607 max mem: 9377 +Train: [89] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1330 (0.1464) loss: 0.8133 (0.8115) time: 0.1657 data: 0.0747 max mem: 9377 +Train: [89] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.1339 (0.1464) loss: 0.8141 (0.8116) time: 0.1342 data: 0.0500 max mem: 9377 +Train: [89] Total time: 0:15:21 (0.1474 s / it) +Averaged stats: lr: 0.000003 grad: 0.1339 (0.1464) loss: 0.8141 (0.8116) +Eval (hcp-train-subset): [89] [ 0/62] eta: 0:05:39 loss: 0.8181 (0.8181) time: 5.4777 data: 5.4454 max mem: 9377 +Eval (hcp-train-subset): [89] [61/62] eta: 0:00:00 loss: 0.8086 (0.8096) time: 0.1119 data: 0.0870 max mem: 9377 +Eval (hcp-train-subset): [89] Total time: 0:00:16 (0.2593 s / it) +Averaged stats (hcp-train-subset): loss: 0.8086 (0.8096) +Making plots (hcp-train-subset): example=23 +Eval (hcp-val): [89] [ 0/62] eta: 0:05:14 loss: 0.8242 (0.8242) time: 5.0706 data: 5.0318 max mem: 9377 +Eval (hcp-val): [89] [61/62] eta: 0:00:00 loss: 0.8278 (0.8294) time: 0.1456 data: 0.1204 max mem: 9377 +Eval (hcp-val): [89] Total time: 0:00:13 (0.2237 s / it) +Averaged stats (hcp-val): loss: 0.8278 (0.8294) +Making plots (hcp-val): example=5 +Eval (nsd-val): [89] [ 0/62] eta: 0:04:34 loss: 0.8014 (0.8014) time: 4.4259 data: 4.3114 max mem: 9377 +Eval (nsd-val): [89] [61/62] eta: 0:00:00 loss: 0.8108 (0.8137) time: 0.1338 data: 0.1083 max mem: 9377 +Eval (nsd-val): [89] Total time: 0:00:14 (0.2275 s / it) +Averaged stats (nsd-val): loss: 0.8108 (0.8137) +Making plots (nsd-val): example=1 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00089.pth +Train: [90] [ 0/6250] eta: 11:07:02 lr: 0.000003 grad: 0.1203 (0.1203) loss: 0.8074 (0.8074) time: 6.4035 data: 6.2694 max mem: 9377 +Train: [90] [ 100/6250] eta: 0:23:21 lr: 0.000003 grad: 0.1576 (0.1784) loss: 0.8169 (0.8164) time: 0.1833 data: 0.0922 max mem: 9377 +Train: [90] [ 200/6250] eta: 0:20:04 lr: 0.000003 grad: 0.1704 (0.1735) loss: 0.8155 (0.8143) time: 0.1583 data: 0.0520 max mem: 9377 +Train: [90] [ 300/6250] eta: 0:18:52 lr: 0.000003 grad: 0.1631 (0.1693) loss: 0.8147 (0.8142) time: 0.1608 data: 0.0573 max mem: 9377 +Train: [90] [ 400/6250] eta: 0:18:19 lr: 0.000003 grad: 0.1548 (0.1668) loss: 0.8067 (0.8128) time: 0.2019 data: 0.1024 max mem: 9377 +Train: [90] [ 500/6250] eta: 0:17:16 lr: 0.000003 grad: 0.1593 (0.1662) loss: 0.8074 (0.8114) time: 0.1551 data: 0.0568 max mem: 9377 +Train: [90] [ 600/6250] eta: 0:16:22 lr: 0.000003 grad: 0.1550 (0.1653) loss: 0.8023 (0.8103) time: 0.1327 data: 0.0333 max mem: 9377 +Train: [90] [ 700/6250] eta: 0:15:45 lr: 0.000003 grad: 0.1599 (0.1646) loss: 0.8015 (0.8094) time: 0.1517 data: 0.0602 max mem: 9377 +Train: [90] [ 800/6250] eta: 0:15:18 lr: 0.000003 grad: 0.1540 (0.1639) loss: 0.8075 (0.8088) time: 0.1651 data: 0.0748 max mem: 9377 +Train: [90] [ 900/6250] eta: 0:14:49 lr: 0.000003 grad: 0.1528 (0.1630) loss: 0.8143 (0.8086) time: 0.1419 data: 0.0508 max mem: 9377 +Train: [90] [1000/6250] eta: 0:14:29 lr: 0.000003 grad: 0.1574 (0.1624) loss: 0.8125 (0.8085) time: 0.1595 data: 0.0762 max mem: 9377 +Train: [90] [1100/6250] eta: 0:14:11 lr: 0.000003 grad: 0.1589 (0.1614) loss: 0.8094 (0.8084) time: 0.1592 data: 0.0729 max mem: 9377 +Train: [90] [1200/6250] eta: 0:13:51 lr: 0.000003 grad: 0.1606 (0.1608) loss: 0.8068 (0.8083) time: 0.1617 data: 0.0819 max mem: 9377 +Train: [90] [1300/6250] eta: 0:13:32 lr: 0.000003 grad: 0.1562 (0.1603) loss: 0.8082 (0.8081) time: 0.1695 data: 0.0871 max mem: 9377 +Train: [90] [1400/6250] eta: 0:13:13 lr: 0.000003 grad: 0.1453 (0.1598) loss: 0.8060 (0.8079) time: 0.1374 data: 0.0580 max mem: 9377 +Train: [90] [1500/6250] eta: 0:12:51 lr: 0.000003 grad: 0.1507 (0.1594) loss: 0.8043 (0.8076) time: 0.1293 data: 0.0435 max mem: 9377 +Train: [90] [1600/6250] eta: 0:12:33 lr: 0.000003 grad: 0.1513 (0.1588) loss: 0.8026 (0.8074) time: 0.1439 data: 0.0586 max mem: 9377 +Train: [90] [1700/6250] eta: 0:12:14 lr: 0.000003 grad: 0.1402 (0.1582) loss: 0.8026 (0.8073) time: 0.1414 data: 0.0602 max mem: 9377 +Train: [90] [1800/6250] eta: 0:11:57 lr: 0.000003 grad: 0.1477 (0.1577) loss: 0.7973 (0.8072) time: 0.1599 data: 0.0782 max mem: 9377 +Train: [90] [1900/6250] eta: 0:11:38 lr: 0.000003 grad: 0.1471 (0.1574) loss: 0.7970 (0.8070) time: 0.1463 data: 0.0579 max mem: 9377 +Train: [90] [2000/6250] eta: 0:11:18 lr: 0.000003 grad: 0.1534 (0.1572) loss: 0.8119 (0.8068) time: 0.1535 data: 0.0700 max mem: 9377 +Train: [90] [2100/6250] eta: 0:10:59 lr: 0.000003 grad: 0.1446 (0.1569) loss: 0.8011 (0.8065) time: 0.1408 data: 0.0594 max mem: 9377 +Train: [90] [2200/6250] eta: 0:10:39 lr: 0.000003 grad: 0.1508 (0.1567) loss: 0.7987 (0.8063) time: 0.1447 data: 0.0669 max mem: 9377 +Train: [90] [2300/6250] eta: 0:10:22 lr: 0.000003 grad: 0.1448 (0.1565) loss: 0.8105 (0.8061) time: 0.1535 data: 0.0711 max mem: 9377 +Train: [90] [2400/6250] eta: 0:10:03 lr: 0.000003 grad: 0.1475 (0.1565) loss: 0.8040 (0.8059) time: 0.1335 data: 0.0515 max mem: 9377 +Train: [90] [2500/6250] eta: 0:09:45 lr: 0.000003 grad: 0.1473 (0.1562) loss: 0.8004 (0.8058) time: 0.1531 data: 0.0670 max mem: 9377 +Train: [90] [2600/6250] eta: 0:09:28 lr: 0.000003 grad: 0.1434 (0.1559) loss: 0.8022 (0.8058) time: 0.1498 data: 0.0694 max mem: 9377 +Train: [90] [2700/6250] eta: 0:09:10 lr: 0.000003 grad: 0.1338 (0.1554) loss: 0.8108 (0.8059) time: 0.1457 data: 0.0653 max mem: 9377 +Train: [90] [2800/6250] eta: 0:08:53 lr: 0.000003 grad: 0.1408 (0.1550) loss: 0.8084 (0.8060) time: 0.1441 data: 0.0624 max mem: 9377 +Train: [90] [2900/6250] eta: 0:08:36 lr: 0.000003 grad: 0.1562 (0.1548) loss: 0.7979 (0.8060) time: 0.1317 data: 0.0534 max mem: 9377 +Train: [90] [3000/6250] eta: 0:08:20 lr: 0.000003 grad: 0.1410 (0.1546) loss: 0.8071 (0.8060) time: 0.1413 data: 0.0566 max mem: 9377 +Train: [90] [3100/6250] eta: 0:08:03 lr: 0.000003 grad: 0.1466 (0.1544) loss: 0.8042 (0.8060) time: 0.1290 data: 0.0457 max mem: 9377 +Train: [90] [3200/6250] eta: 0:07:46 lr: 0.000003 grad: 0.1396 (0.1542) loss: 0.8066 (0.8059) time: 0.1358 data: 0.0430 max mem: 9377 +Train: [90] [3300/6250] eta: 0:07:30 lr: 0.000003 grad: 0.1496 (0.1540) loss: 0.7958 (0.8059) time: 0.1404 data: 0.0530 max mem: 9377 +Train: [90] [3400/6250] eta: 0:07:13 lr: 0.000003 grad: 0.1440 (0.1538) loss: 0.7994 (0.8058) time: 0.1363 data: 0.0564 max mem: 9377 +Train: [90] [3500/6250] eta: 0:06:57 lr: 0.000003 grad: 0.1495 (0.1538) loss: 0.8129 (0.8058) time: 0.1456 data: 0.0616 max mem: 9377 +Train: [90] [3600/6250] eta: 0:06:42 lr: 0.000003 grad: 0.1474 (0.1536) loss: 0.8028 (0.8058) time: 0.1419 data: 0.0608 max mem: 9377 +Train: [90] [3700/6250] eta: 0:06:26 lr: 0.000003 grad: 0.1425 (0.1535) loss: 0.8096 (0.8058) time: 0.1264 data: 0.0468 max mem: 9377 +Train: [90] [3800/6250] eta: 0:06:11 lr: 0.000003 grad: 0.1444 (0.1534) loss: 0.8090 (0.8059) time: 0.1619 data: 0.0808 max mem: 9377 +Train: [90] [3900/6250] eta: 0:05:57 lr: 0.000003 grad: 0.1420 (0.1532) loss: 0.8096 (0.8059) time: 0.2028 data: 0.1133 max mem: 9377 +Train: [90] [4000/6250] eta: 0:05:42 lr: 0.000003 grad: 0.1349 (0.1530) loss: 0.8091 (0.8059) time: 0.1603 data: 0.0727 max mem: 9377 +Train: [90] [4100/6250] eta: 0:05:27 lr: 0.000003 grad: 0.1482 (0.1530) loss: 0.8045 (0.8059) time: 0.1416 data: 0.0509 max mem: 9377 +Train: [90] [4200/6250] eta: 0:05:11 lr: 0.000003 grad: 0.1406 (0.1528) loss: 0.8009 (0.8060) time: 0.1632 data: 0.0876 max mem: 9377 +Train: [90] [4300/6250] eta: 0:04:56 lr: 0.000003 grad: 0.1417 (0.1526) loss: 0.8100 (0.8060) time: 0.1293 data: 0.0427 max mem: 9377 +Train: [90] [4400/6250] eta: 0:04:41 lr: 0.000003 grad: 0.1428 (0.1525) loss: 0.8069 (0.8061) time: 0.1808 data: 0.0914 max mem: 9377 +Train: [90] [4500/6250] eta: 0:04:26 lr: 0.000003 grad: 0.1525 (0.1525) loss: 0.8043 (0.8061) time: 0.1431 data: 0.0581 max mem: 9377 +Train: [90] [4600/6250] eta: 0:04:11 lr: 0.000003 grad: 0.1519 (0.1525) loss: 0.8039 (0.8061) time: 0.1624 data: 0.0727 max mem: 9377 +Train: [90] [4700/6250] eta: 0:03:56 lr: 0.000003 grad: 0.1574 (0.1525) loss: 0.8007 (0.8061) time: 0.1314 data: 0.0462 max mem: 9377 +Train: [90] [4800/6250] eta: 0:03:40 lr: 0.000003 grad: 0.1519 (0.1526) loss: 0.7964 (0.8060) time: 0.1218 data: 0.0287 max mem: 9377 +Train: [90] [4900/6250] eta: 0:03:24 lr: 0.000003 grad: 0.1537 (0.1525) loss: 0.8086 (0.8059) time: 0.1202 data: 0.0356 max mem: 9377 +Train: [90] [5000/6250] eta: 0:03:10 lr: 0.000003 grad: 0.1432 (0.1525) loss: 0.8036 (0.8059) time: 0.1674 data: 0.0826 max mem: 9377 +Train: [90] [5100/6250] eta: 0:02:55 lr: 0.000003 grad: 0.1451 (0.1524) loss: 0.8143 (0.8059) time: 0.1720 data: 0.0954 max mem: 9377 +Train: [90] [5200/6250] eta: 0:02:40 lr: 0.000003 grad: 0.1502 (0.1524) loss: 0.8078 (0.8059) time: 0.2067 data: 0.1315 max mem: 9377 +Train: [90] [5300/6250] eta: 0:02:25 lr: 0.000003 grad: 0.1465 (0.1524) loss: 0.8091 (0.8059) time: 0.1730 data: 0.0908 max mem: 9377 +Train: [90] [5400/6250] eta: 0:02:10 lr: 0.000003 grad: 0.1390 (0.1524) loss: 0.8034 (0.8059) time: 0.1559 data: 0.0771 max mem: 9377 +Train: [90] [5500/6250] eta: 0:01:55 lr: 0.000003 grad: 0.1509 (0.1523) loss: 0.8033 (0.8059) time: 0.1550 data: 0.0676 max mem: 9377 +Train: [90] [5600/6250] eta: 0:01:39 lr: 0.000003 grad: 0.1484 (0.1524) loss: 0.8087 (0.8058) time: 0.1437 data: 0.0638 max mem: 9377 +Train: [90] [5700/6250] eta: 0:01:24 lr: 0.000003 grad: 0.1503 (0.1525) loss: 0.8106 (0.8058) time: 0.1510 data: 0.0663 max mem: 9377 +Train: [90] [5800/6250] eta: 0:01:08 lr: 0.000003 grad: 0.1517 (0.1525) loss: 0.8002 (0.8058) time: 0.1288 data: 0.0461 max mem: 9377 +Train: [90] [5900/6250] eta: 0:00:53 lr: 0.000003 grad: 0.1462 (0.1525) loss: 0.8075 (0.8058) time: 0.1971 data: 0.1223 max mem: 9377 +Train: [90] [6000/6250] eta: 0:00:38 lr: 0.000003 grad: 0.1401 (0.1525) loss: 0.8117 (0.8059) time: 0.1517 data: 0.0683 max mem: 9377 +Train: [90] [6100/6250] eta: 0:00:22 lr: 0.000003 grad: 0.1476 (0.1525) loss: 0.8031 (0.8059) time: 0.1795 data: 0.1026 max mem: 9377 +Train: [90] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1485 (0.1525) loss: 0.8046 (0.8059) time: 0.1529 data: 0.0666 max mem: 9377 +Train: [90] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.1407 (0.1524) loss: 0.8124 (0.8059) time: 0.1515 data: 0.0685 max mem: 9377 +Train: [90] Total time: 0:16:01 (0.1539 s / it) +Averaged stats: lr: 0.000003 grad: 0.1407 (0.1524) loss: 0.8124 (0.8059) +Eval (hcp-train-subset): [90] [ 0/62] eta: 0:04:52 loss: 0.8193 (0.8193) time: 4.7196 data: 4.6863 max mem: 9377 +Eval (hcp-train-subset): [90] [61/62] eta: 0:00:00 loss: 0.8085 (0.8087) time: 0.1385 data: 0.1134 max mem: 9377 +Eval (hcp-train-subset): [90] Total time: 0:00:13 (0.2256 s / it) +Averaged stats (hcp-train-subset): loss: 0.8085 (0.8087) +Eval (hcp-val): [90] [ 0/62] eta: 0:05:42 loss: 0.8250 (0.8250) time: 5.5258 data: 5.4952 max mem: 9377 +Eval (hcp-val): [90] [61/62] eta: 0:00:00 loss: 0.8299 (0.8299) time: 0.1123 data: 0.0855 max mem: 9377 +Eval (hcp-val): [90] Total time: 0:00:13 (0.2195 s / it) +Averaged stats (hcp-val): loss: 0.8299 (0.8299) +Eval (nsd-val): [90] [ 0/62] eta: 0:06:20 loss: 0.8032 (0.8032) time: 6.1358 data: 6.1056 max mem: 9377 +Eval (nsd-val): [90] [61/62] eta: 0:00:00 loss: 0.8123 (0.8128) time: 0.1343 data: 0.1087 max mem: 9377 +Eval (nsd-val): [90] Total time: 0:00:13 (0.2250 s / it) +Averaged stats (nsd-val): loss: 0.8123 (0.8128) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [91] [ 0/6250] eta: 11:40:43 lr: 0.000003 grad: 0.1502 (0.1502) loss: 0.7924 (0.7924) time: 6.7270 data: 6.6337 max mem: 9377 +Train: [91] [ 100/6250] eta: 0:20:23 lr: 0.000003 grad: 0.1642 (0.1708) loss: 0.8245 (0.8294) time: 0.1371 data: 0.0338 max mem: 9377 +Train: [91] [ 200/6250] eta: 0:17:37 lr: 0.000003 grad: 0.1721 (0.1753) loss: 0.8216 (0.8229) time: 0.1659 data: 0.0717 max mem: 9377 +Train: [91] [ 300/6250] eta: 0:16:09 lr: 0.000003 grad: 0.1653 (0.1753) loss: 0.8077 (0.8183) time: 0.1411 data: 0.0481 max mem: 9377 +Train: [91] [ 400/6250] eta: 0:15:31 lr: 0.000003 grad: 0.1610 (0.1736) loss: 0.8026 (0.8152) time: 0.1404 data: 0.0538 max mem: 9377 +Train: [91] [ 500/6250] eta: 0:14:53 lr: 0.000003 grad: 0.1573 (0.1715) loss: 0.8067 (0.8131) time: 0.1398 data: 0.0533 max mem: 9377 +Train: [91] [ 600/6250] eta: 0:14:21 lr: 0.000003 grad: 0.1362 (0.1686) loss: 0.8092 (0.8122) time: 0.1443 data: 0.0384 max mem: 9377 +Train: [91] [ 700/6250] eta: 0:13:59 lr: 0.000003 grad: 0.1430 (0.1667) loss: 0.8146 (0.8115) time: 0.1576 data: 0.0663 max mem: 9377 +Train: [91] [ 800/6250] eta: 0:13:32 lr: 0.000003 grad: 0.1481 (0.1644) loss: 0.8072 (0.8111) time: 0.1350 data: 0.0393 max mem: 9377 +Train: [91] [ 900/6250] eta: 0:13:15 lr: 0.000003 grad: 0.1434 (0.1629) loss: 0.8088 (0.8107) time: 0.1514 data: 0.0686 max mem: 9377 +Train: [91] [1000/6250] eta: 0:12:56 lr: 0.000003 grad: 0.1483 (0.1618) loss: 0.8092 (0.8104) time: 0.1347 data: 0.0468 max mem: 9377 +Train: [91] [1100/6250] eta: 0:12:37 lr: 0.000003 grad: 0.1530 (0.1611) loss: 0.8096 (0.8102) time: 0.1129 data: 0.0255 max mem: 9377 +Train: [91] [1200/6250] eta: 0:12:17 lr: 0.000003 grad: 0.1370 (0.1603) loss: 0.8133 (0.8101) time: 0.1210 data: 0.0363 max mem: 9377 +Train: [91] [1300/6250] eta: 0:12:01 lr: 0.000003 grad: 0.1433 (0.1592) loss: 0.8056 (0.8102) time: 0.1746 data: 0.0873 max mem: 9377 +Train: [91] [1400/6250] eta: 0:11:43 lr: 0.000003 grad: 0.1462 (0.1585) loss: 0.8034 (0.8100) time: 0.1349 data: 0.0491 max mem: 9377 +Train: [91] [1500/6250] eta: 0:11:25 lr: 0.000003 grad: 0.1483 (0.1581) loss: 0.8056 (0.8100) time: 0.1496 data: 0.0690 max mem: 9377 +Train: [91] [1600/6250] eta: 0:11:07 lr: 0.000003 grad: 0.1534 (0.1579) loss: 0.8046 (0.8097) time: 0.1405 data: 0.0539 max mem: 9377 +Train: [91] [1700/6250] eta: 0:10:50 lr: 0.000003 grad: 0.1499 (0.1575) loss: 0.8082 (0.8095) time: 0.1213 data: 0.0328 max mem: 9377 +Train: [91] [1800/6250] eta: 0:10:35 lr: 0.000003 grad: 0.1446 (0.1572) loss: 0.8123 (0.8094) time: 0.1229 data: 0.0378 max mem: 9377 +Train: [91] [1900/6250] eta: 0:10:20 lr: 0.000003 grad: 0.1552 (0.1569) loss: 0.8088 (0.8094) time: 0.1480 data: 0.0664 max mem: 9377 +Train: [91] [2000/6250] eta: 0:10:06 lr: 0.000003 grad: 0.1521 (0.1566) loss: 0.8127 (0.8093) time: 0.1324 data: 0.0499 max mem: 9377 +Train: [91] [2100/6250] eta: 0:09:52 lr: 0.000003 grad: 0.1395 (0.1562) loss: 0.8088 (0.8092) time: 0.1677 data: 0.0889 max mem: 9377 +Train: [91] [2200/6250] eta: 0:09:37 lr: 0.000003 grad: 0.1375 (0.1558) loss: 0.8094 (0.8094) time: 0.1441 data: 0.0651 max mem: 9377 +Train: [91] [2300/6250] eta: 0:09:23 lr: 0.000003 grad: 0.1356 (0.1552) loss: 0.8154 (0.8095) time: 0.1157 data: 0.0289 max mem: 9377 +Train: [91] [2400/6250] eta: 0:09:09 lr: 0.000003 grad: 0.1362 (0.1546) loss: 0.8111 (0.8096) time: 0.1370 data: 0.0559 max mem: 9377 +Train: [91] [2500/6250] eta: 0:08:55 lr: 0.000003 grad: 0.1445 (0.1542) loss: 0.8084 (0.8098) time: 0.1527 data: 0.0658 max mem: 9377 +Train: [91] [2600/6250] eta: 0:08:40 lr: 0.000003 grad: 0.1374 (0.1539) loss: 0.8156 (0.8099) time: 0.1396 data: 0.0567 max mem: 9377 +Train: [91] [2700/6250] eta: 0:08:26 lr: 0.000002 grad: 0.1271 (0.1535) loss: 0.8161 (0.8101) time: 0.1442 data: 0.0645 max mem: 9377 +Train: [91] [2800/6250] eta: 0:08:11 lr: 0.000002 grad: 0.1401 (0.1532) loss: 0.8092 (0.8102) time: 0.1464 data: 0.0602 max mem: 9377 +Train: [91] [2900/6250] eta: 0:07:57 lr: 0.000002 grad: 0.1467 (0.1529) loss: 0.8086 (0.8103) time: 0.1499 data: 0.0744 max mem: 9377 +Train: [91] [3000/6250] eta: 0:07:43 lr: 0.000002 grad: 0.1415 (0.1526) loss: 0.8099 (0.8104) time: 0.1839 data: 0.1053 max mem: 9377 +Train: [91] [3100/6250] eta: 0:07:29 lr: 0.000002 grad: 0.1500 (0.1525) loss: 0.8079 (0.8105) time: 0.1500 data: 0.0659 max mem: 9377 +Train: [91] [3200/6250] eta: 0:07:15 lr: 0.000002 grad: 0.1382 (0.1525) loss: 0.8141 (0.8104) time: 0.1466 data: 0.0632 max mem: 9377 +Train: [91] [3300/6250] eta: 0:07:00 lr: 0.000002 grad: 0.1444 (0.1525) loss: 0.8072 (0.8104) time: 0.1282 data: 0.0432 max mem: 9377 +Train: [91] [3400/6250] eta: 0:06:46 lr: 0.000002 grad: 0.1551 (0.1526) loss: 0.8037 (0.8104) time: 0.1570 data: 0.0746 max mem: 9377 +Train: [91] [3500/6250] eta: 0:06:31 lr: 0.000002 grad: 0.1447 (0.1527) loss: 0.8120 (0.8103) time: 0.1487 data: 0.0663 max mem: 9377 +Train: [91] [3600/6250] eta: 0:06:17 lr: 0.000002 grad: 0.1454 (0.1526) loss: 0.8078 (0.8103) time: 0.1435 data: 0.0596 max mem: 9377 +Train: [91] [3700/6250] eta: 0:06:03 lr: 0.000002 grad: 0.1457 (0.1526) loss: 0.8106 (0.8103) time: 0.1756 data: 0.0973 max mem: 9377 +Train: [91] [3800/6250] eta: 0:05:49 lr: 0.000002 grad: 0.1574 (0.1526) loss: 0.8051 (0.8103) time: 0.1661 data: 0.0629 max mem: 9377 +Train: [91] [3900/6250] eta: 0:05:35 lr: 0.000002 grad: 0.1465 (0.1525) loss: 0.8104 (0.8103) time: 0.1414 data: 0.0637 max mem: 9377 +Train: [91] [4000/6250] eta: 0:05:22 lr: 0.000002 grad: 0.1459 (0.1524) loss: 0.8087 (0.8102) time: 0.1995 data: 0.1214 max mem: 9377 +Train: [91] [4100/6250] eta: 0:05:08 lr: 0.000002 grad: 0.1496 (0.1524) loss: 0.8111 (0.8101) time: 0.1629 data: 0.0804 max mem: 9377 +Train: [91] [4200/6250] eta: 0:04:53 lr: 0.000002 grad: 0.1494 (0.1523) loss: 0.8074 (0.8100) time: 0.1625 data: 0.0749 max mem: 9377 +Train: [91] [4300/6250] eta: 0:04:40 lr: 0.000002 grad: 0.1451 (0.1523) loss: 0.8091 (0.8100) time: 0.1493 data: 0.0682 max mem: 9377 +Train: [91] [4400/6250] eta: 0:04:25 lr: 0.000002 grad: 0.1494 (0.1522) loss: 0.8012 (0.8098) time: 0.1406 data: 0.0558 max mem: 9377 +Train: [91] [4500/6250] eta: 0:04:11 lr: 0.000002 grad: 0.1475 (0.1521) loss: 0.8014 (0.8097) time: 0.1698 data: 0.0900 max mem: 9377 +Train: [91] [4600/6250] eta: 0:03:57 lr: 0.000002 grad: 0.1431 (0.1521) loss: 0.8032 (0.8096) time: 0.1415 data: 0.0598 max mem: 9377 +Train: [91] [4700/6250] eta: 0:03:42 lr: 0.000002 grad: 0.1482 (0.1519) loss: 0.8035 (0.8095) time: 0.1368 data: 0.0559 max mem: 9377 +Train: [91] [4800/6250] eta: 0:03:27 lr: 0.000002 grad: 0.1426 (0.1518) loss: 0.8067 (0.8095) time: 0.1325 data: 0.0465 max mem: 9377 +Train: [91] [4900/6250] eta: 0:03:13 lr: 0.000002 grad: 0.1437 (0.1518) loss: 0.8015 (0.8094) time: 0.1296 data: 0.0353 max mem: 9377 +Train: [91] [5000/6250] eta: 0:02:58 lr: 0.000002 grad: 0.1474 (0.1517) loss: 0.8045 (0.8094) time: 0.1417 data: 0.0589 max mem: 9377 +Train: [91] [5100/6250] eta: 0:02:44 lr: 0.000002 grad: 0.1438 (0.1516) loss: 0.8085 (0.8093) time: 0.1335 data: 0.0380 max mem: 9377 +Train: [91] [5200/6250] eta: 0:02:30 lr: 0.000002 grad: 0.1459 (0.1514) loss: 0.8042 (0.8093) time: 0.1277 data: 0.0425 max mem: 9377 +Train: [91] [5300/6250] eta: 0:02:15 lr: 0.000002 grad: 0.1479 (0.1514) loss: 0.8047 (0.8092) time: 0.1295 data: 0.0443 max mem: 9377 +Train: [91] [5400/6250] eta: 0:02:01 lr: 0.000002 grad: 0.1511 (0.1513) loss: 0.8054 (0.8091) time: 0.1356 data: 0.0510 max mem: 9377 +Train: [91] [5500/6250] eta: 0:01:46 lr: 0.000002 grad: 0.1529 (0.1514) loss: 0.8025 (0.8091) time: 0.1531 data: 0.0702 max mem: 9377 +Train: [91] [5600/6250] eta: 0:01:32 lr: 0.000002 grad: 0.1488 (0.1515) loss: 0.8045 (0.8090) time: 0.0910 data: 0.0073 max mem: 9377 +Train: [91] [5700/6250] eta: 0:01:18 lr: 0.000002 grad: 0.1578 (0.1515) loss: 0.8033 (0.8090) time: 0.1352 data: 0.0552 max mem: 9377 +Train: [91] [5800/6250] eta: 0:01:04 lr: 0.000002 grad: 0.1428 (0.1514) loss: 0.8030 (0.8089) time: 0.1089 data: 0.0211 max mem: 9377 +Train: [91] [5900/6250] eta: 0:00:49 lr: 0.000002 grad: 0.1405 (0.1515) loss: 0.8060 (0.8088) time: 0.1497 data: 0.0691 max mem: 9377 +Train: [91] [6000/6250] eta: 0:00:35 lr: 0.000002 grad: 0.1471 (0.1514) loss: 0.8062 (0.8087) time: 0.1550 data: 0.0735 max mem: 9377 +Train: [91] [6100/6250] eta: 0:00:21 lr: 0.000002 grad: 0.1476 (0.1514) loss: 0.7995 (0.8087) time: 0.1816 data: 0.1025 max mem: 9377 +Train: [91] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1456 (0.1514) loss: 0.8081 (0.8086) time: 0.1620 data: 0.0770 max mem: 9377 +Train: [91] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1474 (0.1514) loss: 0.8056 (0.8085) time: 0.1363 data: 0.0595 max mem: 9377 +Train: [91] Total time: 0:14:57 (0.1436 s / it) +Averaged stats: lr: 0.000002 grad: 0.1474 (0.1514) loss: 0.8056 (0.8085) +Eval (hcp-train-subset): [91] [ 0/62] eta: 0:04:04 loss: 0.8169 (0.8169) time: 3.9370 data: 3.8655 max mem: 9377 +Eval (hcp-train-subset): [91] [61/62] eta: 0:00:00 loss: 0.8061 (0.8081) time: 0.1317 data: 0.1066 max mem: 9377 +Eval (hcp-train-subset): [91] Total time: 0:00:13 (0.2246 s / it) +Averaged stats (hcp-train-subset): loss: 0.8061 (0.8081) +Eval (hcp-val): [91] [ 0/62] eta: 0:05:14 loss: 0.8243 (0.8243) time: 5.0707 data: 5.0400 max mem: 9377 +Eval (hcp-val): [91] [61/62] eta: 0:00:00 loss: 0.8266 (0.8291) time: 0.1228 data: 0.0975 max mem: 9377 +Eval (hcp-val): [91] Total time: 0:00:13 (0.2209 s / it) +Averaged stats (hcp-val): loss: 0.8266 (0.8291) +Eval (nsd-val): [91] [ 0/62] eta: 0:04:16 loss: 0.8022 (0.8022) time: 4.1403 data: 4.0694 max mem: 9377 +Eval (nsd-val): [91] [61/62] eta: 0:00:00 loss: 0.8122 (0.8132) time: 0.0931 data: 0.0679 max mem: 9377 +Eval (nsd-val): [91] Total time: 0:00:13 (0.2105 s / it) +Averaged stats (nsd-val): loss: 0.8122 (0.8132) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [92] [ 0/6250] eta: 9:30:20 lr: 0.000002 grad: 0.1038 (0.1038) loss: 0.8148 (0.8148) time: 5.4753 data: 5.2864 max mem: 9377 +Train: [92] [ 100/6250] eta: 0:20:40 lr: 0.000002 grad: 0.1412 (0.1664) loss: 0.8304 (0.8289) time: 0.1696 data: 0.0695 max mem: 9377 +Train: [92] [ 200/6250] eta: 0:18:06 lr: 0.000002 grad: 0.1706 (0.1605) loss: 0.8180 (0.8236) time: 0.1651 data: 0.0622 max mem: 9377 +Train: [92] [ 300/6250] eta: 0:16:36 lr: 0.000002 grad: 0.1510 (0.1596) loss: 0.8146 (0.8200) time: 0.1551 data: 0.0584 max mem: 9377 +Train: [92] [ 400/6250] eta: 0:15:26 lr: 0.000002 grad: 0.1603 (0.1602) loss: 0.8052 (0.8175) time: 0.1311 data: 0.0358 max mem: 9377 +Train: [92] [ 500/6250] eta: 0:14:45 lr: 0.000002 grad: 0.1491 (0.1603) loss: 0.8146 (0.8158) time: 0.1368 data: 0.0477 max mem: 9377 +Train: [92] [ 600/6250] eta: 0:14:15 lr: 0.000002 grad: 0.1615 (0.1600) loss: 0.8105 (0.8151) time: 0.1433 data: 0.0470 max mem: 9377 +Train: [92] [ 700/6250] eta: 0:14:07 lr: 0.000002 grad: 0.1564 (0.1593) loss: 0.8089 (0.8146) time: 0.1552 data: 0.0699 max mem: 9377 +Train: [92] [ 800/6250] eta: 0:14:00 lr: 0.000002 grad: 0.1515 (0.1585) loss: 0.8098 (0.8142) time: 0.1613 data: 0.0749 max mem: 9377 +Train: [92] [ 900/6250] eta: 0:13:55 lr: 0.000002 grad: 0.1471 (0.1577) loss: 0.8153 (0.8140) time: 0.1739 data: 0.0921 max mem: 9377 +Train: [92] [1000/6250] eta: 0:13:42 lr: 0.000002 grad: 0.1493 (0.1569) loss: 0.8142 (0.8139) time: 0.1629 data: 0.0799 max mem: 9377 +Train: [92] [1100/6250] eta: 0:13:23 lr: 0.000002 grad: 0.1469 (0.1560) loss: 0.8160 (0.8138) time: 0.1540 data: 0.0664 max mem: 9377 +Train: [92] [1200/6250] eta: 0:13:01 lr: 0.000002 grad: 0.1414 (0.1554) loss: 0.8148 (0.8137) time: 0.1368 data: 0.0480 max mem: 9377 +Train: [92] [1300/6250] eta: 0:12:43 lr: 0.000002 grad: 0.1489 (0.1550) loss: 0.8109 (0.8135) time: 0.1756 data: 0.0944 max mem: 9377 +Train: [92] [1400/6250] eta: 0:12:26 lr: 0.000002 grad: 0.1444 (0.1550) loss: 0.8025 (0.8130) time: 0.1551 data: 0.0748 max mem: 9377 +Train: [92] [1500/6250] eta: 0:12:09 lr: 0.000002 grad: 0.1550 (0.1548) loss: 0.8064 (0.8126) time: 0.1502 data: 0.0698 max mem: 9377 +Train: [92] [1600/6250] eta: 0:11:55 lr: 0.000002 grad: 0.1484 (0.1547) loss: 0.8068 (0.8123) time: 0.1668 data: 0.0818 max mem: 9377 +Train: [92] [1700/6250] eta: 0:11:38 lr: 0.000002 grad: 0.1502 (0.1548) loss: 0.8002 (0.8120) time: 0.1197 data: 0.0368 max mem: 9377 +Train: [92] [1800/6250] eta: 0:11:21 lr: 0.000002 grad: 0.1526 (0.1549) loss: 0.8029 (0.8115) time: 0.1221 data: 0.0341 max mem: 9377 +Train: [92] [1900/6250] eta: 0:11:05 lr: 0.000002 grad: 0.1460 (0.1548) loss: 0.8072 (0.8113) time: 0.1376 data: 0.0542 max mem: 9377 +Train: [92] [2000/6250] eta: 0:10:47 lr: 0.000002 grad: 0.1572 (0.1547) loss: 0.8015 (0.8111) time: 0.1235 data: 0.0395 max mem: 9377 +Train: [92] [2100/6250] eta: 0:10:31 lr: 0.000002 grad: 0.1529 (0.1549) loss: 0.7973 (0.8109) time: 0.1543 data: 0.0721 max mem: 9377 +Train: [92] [2200/6250] eta: 0:10:13 lr: 0.000002 grad: 0.1498 (0.1548) loss: 0.8059 (0.8107) time: 0.1469 data: 0.0626 max mem: 9377 +Train: [92] [2300/6250] eta: 0:09:56 lr: 0.000002 grad: 0.1494 (0.1548) loss: 0.8030 (0.8106) time: 0.1405 data: 0.0622 max mem: 9377 +Train: [92] [2400/6250] eta: 0:09:41 lr: 0.000002 grad: 0.1416 (0.1547) loss: 0.8066 (0.8104) time: 0.1438 data: 0.0590 max mem: 9377 +Train: [92] [2500/6250] eta: 0:09:26 lr: 0.000002 grad: 0.1527 (0.1548) loss: 0.7943 (0.8101) time: 0.1539 data: 0.0759 max mem: 9377 +Train: [92] [2600/6250] eta: 0:09:10 lr: 0.000002 grad: 0.1474 (0.1549) loss: 0.8103 (0.8099) time: 0.1559 data: 0.0622 max mem: 9377 +Train: [92] [2700/6250] eta: 0:08:54 lr: 0.000002 grad: 0.1390 (0.1548) loss: 0.8004 (0.8097) time: 0.1358 data: 0.0474 max mem: 9377 +Train: [92] [2800/6250] eta: 0:08:39 lr: 0.000002 grad: 0.1519 (0.1549) loss: 0.8036 (0.8095) time: 0.1201 data: 0.0433 max mem: 9377 +Train: [92] [2900/6250] eta: 0:08:24 lr: 0.000002 grad: 0.1550 (0.1549) loss: 0.8006 (0.8093) time: 0.1356 data: 0.0518 max mem: 9377 +Train: [92] [3000/6250] eta: 0:08:08 lr: 0.000002 grad: 0.1514 (0.1547) loss: 0.8058 (0.8093) time: 0.1294 data: 0.0446 max mem: 9377 +Train: [92] [3100/6250] eta: 0:07:52 lr: 0.000002 grad: 0.1497 (0.1547) loss: 0.8068 (0.8092) time: 0.1553 data: 0.0699 max mem: 9377 +Train: [92] [3200/6250] eta: 0:07:37 lr: 0.000002 grad: 0.1543 (0.1547) loss: 0.8083 (0.8091) time: 0.1406 data: 0.0563 max mem: 9377 +Train: [92] [3300/6250] eta: 0:07:21 lr: 0.000002 grad: 0.1523 (0.1546) loss: 0.8092 (0.8091) time: 0.1353 data: 0.0551 max mem: 9377 +Train: [92] [3400/6250] eta: 0:07:05 lr: 0.000002 grad: 0.1459 (0.1544) loss: 0.7987 (0.8090) time: 0.1342 data: 0.0504 max mem: 9377 +Train: [92] [3500/6250] eta: 0:06:52 lr: 0.000002 grad: 0.1497 (0.1543) loss: 0.8068 (0.8090) time: 0.1524 data: 0.0632 max mem: 9377 +Train: [92] [3600/6250] eta: 0:06:39 lr: 0.000002 grad: 0.1450 (0.1542) loss: 0.8102 (0.8089) time: 0.1705 data: 0.0886 max mem: 9377 +Train: [92] [3700/6250] eta: 0:06:25 lr: 0.000002 grad: 0.1461 (0.1542) loss: 0.8106 (0.8089) time: 0.1672 data: 0.0848 max mem: 9377 +Train: [92] [3800/6250] eta: 0:06:11 lr: 0.000002 grad: 0.1464 (0.1541) loss: 0.8140 (0.8089) time: 0.1636 data: 0.0794 max mem: 9377 +Train: [92] [3900/6250] eta: 0:05:56 lr: 0.000002 grad: 0.1507 (0.1541) loss: 0.8034 (0.8088) time: 0.1572 data: 0.0747 max mem: 9377 +Train: [92] [4000/6250] eta: 0:05:41 lr: 0.000002 grad: 0.1410 (0.1540) loss: 0.8137 (0.8088) time: 0.1508 data: 0.0652 max mem: 9377 +Train: [92] [4100/6250] eta: 0:05:27 lr: 0.000002 grad: 0.1483 (0.1539) loss: 0.8113 (0.8088) time: 0.1566 data: 0.0727 max mem: 9377 +Train: [92] [4200/6250] eta: 0:05:12 lr: 0.000002 grad: 0.1419 (0.1538) loss: 0.8164 (0.8089) time: 0.1395 data: 0.0495 max mem: 9377 +Train: [92] [4300/6250] eta: 0:04:56 lr: 0.000002 grad: 0.1566 (0.1538) loss: 0.8042 (0.8089) time: 0.1485 data: 0.0688 max mem: 9377 +Train: [92] [4400/6250] eta: 0:04:40 lr: 0.000002 grad: 0.1463 (0.1538) loss: 0.8092 (0.8089) time: 0.1318 data: 0.0488 max mem: 9377 +Train: [92] [4500/6250] eta: 0:04:25 lr: 0.000002 grad: 0.1470 (0.1537) loss: 0.8126 (0.8090) time: 0.1551 data: 0.0750 max mem: 9377 +Train: [92] [4600/6250] eta: 0:04:10 lr: 0.000002 grad: 0.1465 (0.1537) loss: 0.8145 (0.8090) time: 0.1625 data: 0.0808 max mem: 9377 +Train: [92] [4700/6250] eta: 0:03:55 lr: 0.000002 grad: 0.1609 (0.1537) loss: 0.8063 (0.8090) time: 0.1938 data: 0.1177 max mem: 9377 +Train: [92] [4800/6250] eta: 0:03:41 lr: 0.000002 grad: 0.1480 (0.1537) loss: 0.8082 (0.8090) time: 0.1599 data: 0.0837 max mem: 9377 +Train: [92] [4900/6250] eta: 0:03:26 lr: 0.000002 grad: 0.1481 (0.1537) loss: 0.8113 (0.8090) time: 0.1604 data: 0.0785 max mem: 9377 +Train: [92] [5000/6250] eta: 0:03:11 lr: 0.000002 grad: 0.1450 (0.1536) loss: 0.8047 (0.8090) time: 0.1427 data: 0.0628 max mem: 9377 +Train: [92] [5100/6250] eta: 0:02:56 lr: 0.000002 grad: 0.1505 (0.1536) loss: 0.8089 (0.8090) time: 0.1816 data: 0.1048 max mem: 9377 +Train: [92] [5200/6250] eta: 0:02:41 lr: 0.000002 grad: 0.1448 (0.1535) loss: 0.8126 (0.8090) time: 0.1563 data: 0.0775 max mem: 9377 +Train: [92] [5300/6250] eta: 0:02:25 lr: 0.000002 grad: 0.1473 (0.1535) loss: 0.8080 (0.8089) time: 0.1672 data: 0.0840 max mem: 9377 +Train: [92] [5400/6250] eta: 0:02:10 lr: 0.000002 grad: 0.1524 (0.1535) loss: 0.8042 (0.8089) time: 0.1424 data: 0.0595 max mem: 9377 +Train: [92] [5500/6250] eta: 0:01:54 lr: 0.000002 grad: 0.1497 (0.1536) loss: 0.8005 (0.8089) time: 0.1569 data: 0.0819 max mem: 9377 +Train: [92] [5600/6250] eta: 0:01:39 lr: 0.000002 grad: 0.1484 (0.1536) loss: 0.8080 (0.8088) time: 0.1543 data: 0.0749 max mem: 9377 +Train: [92] [5700/6250] eta: 0:01:24 lr: 0.000002 grad: 0.1491 (0.1536) loss: 0.8063 (0.8088) time: 0.1626 data: 0.0807 max mem: 9377 +Train: [92] [5800/6250] eta: 0:01:09 lr: 0.000002 grad: 0.1558 (0.1536) loss: 0.8052 (0.8088) time: 0.1589 data: 0.0765 max mem: 9377 +Train: [92] [5900/6250] eta: 0:00:53 lr: 0.000002 grad: 0.1573 (0.1536) loss: 0.8037 (0.8088) time: 0.1659 data: 0.0864 max mem: 9377 +Train: [92] [6000/6250] eta: 0:00:38 lr: 0.000002 grad: 0.1490 (0.1537) loss: 0.8009 (0.8087) time: 0.1648 data: 0.0789 max mem: 9377 +Train: [92] [6100/6250] eta: 0:00:23 lr: 0.000002 grad: 0.1516 (0.1537) loss: 0.8071 (0.8086) time: 0.1290 data: 0.0513 max mem: 9377 +Train: [92] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1531 (0.1537) loss: 0.8029 (0.8086) time: 0.1507 data: 0.0449 max mem: 9377 +Train: [92] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1435 (0.1537) loss: 0.8093 (0.8086) time: 0.1526 data: 0.0653 max mem: 9377 +Train: [92] Total time: 0:16:03 (0.1542 s / it) +Averaged stats: lr: 0.000002 grad: 0.1435 (0.1537) loss: 0.8093 (0.8086) +Eval (hcp-train-subset): [92] [ 0/62] eta: 0:04:47 loss: 0.8173 (0.8173) time: 4.6304 data: 4.5524 max mem: 9377 +Eval (hcp-train-subset): [92] [61/62] eta: 0:00:00 loss: 0.8070 (0.8083) time: 0.1271 data: 0.1016 max mem: 9377 +Eval (hcp-train-subset): [92] Total time: 0:00:13 (0.2208 s / it) +Averaged stats (hcp-train-subset): loss: 0.8070 (0.8083) +Eval (hcp-val): [92] [ 0/62] eta: 0:05:21 loss: 0.8248 (0.8248) time: 5.1933 data: 5.1632 max mem: 9377 +Eval (hcp-val): [92] [61/62] eta: 0:00:00 loss: 0.8288 (0.8294) time: 0.1249 data: 0.0995 max mem: 9377 +Eval (hcp-val): [92] Total time: 0:00:13 (0.2199 s / it) +Averaged stats (hcp-val): loss: 0.8288 (0.8294) +Eval (nsd-val): [92] [ 0/62] eta: 0:05:34 loss: 0.8061 (0.8061) time: 5.3932 data: 5.3595 max mem: 9377 +Eval (nsd-val): [92] [61/62] eta: 0:00:00 loss: 0.8114 (0.8125) time: 0.1087 data: 0.0819 max mem: 9377 +Eval (nsd-val): [92] Total time: 0:00:12 (0.2062 s / it) +Averaged stats (nsd-val): loss: 0.8114 (0.8125) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [93] [ 0/6250] eta: 8:28:19 lr: 0.000002 grad: 0.1876 (0.1876) loss: 0.8353 (0.8353) time: 4.8799 data: 4.6074 max mem: 9377 +Train: [93] [ 100/6250] eta: 0:21:10 lr: 0.000002 grad: 0.1514 (0.2241) loss: 0.8120 (0.8135) time: 0.1716 data: 0.0653 max mem: 9377 +Train: [93] [ 200/6250] eta: 0:17:47 lr: 0.000002 grad: 0.1607 (0.2064) loss: 0.8073 (0.8087) time: 0.1516 data: 0.0536 max mem: 9377 +Train: [93] [ 300/6250] eta: 0:16:16 lr: 0.000002 grad: 0.1686 (0.1957) loss: 0.8039 (0.8084) time: 0.1197 data: 0.0250 max mem: 9377 +Train: [93] [ 400/6250] eta: 0:15:28 lr: 0.000002 grad: 0.1419 (0.1884) loss: 0.8166 (0.8085) time: 0.1307 data: 0.0275 max mem: 9377 +Train: [93] [ 500/6250] eta: 0:14:56 lr: 0.000002 grad: 0.1528 (0.1827) loss: 0.8192 (0.8089) time: 0.1724 data: 0.0817 max mem: 9377 +Train: [93] [ 600/6250] eta: 0:14:31 lr: 0.000002 grad: 0.1448 (0.1776) loss: 0.8200 (0.8100) time: 0.1429 data: 0.0516 max mem: 9377 +Train: [93] [ 700/6250] eta: 0:14:07 lr: 0.000002 grad: 0.1483 (0.1741) loss: 0.8155 (0.8108) time: 0.1366 data: 0.0499 max mem: 9377 +Train: [93] [ 800/6250] eta: 0:13:52 lr: 0.000002 grad: 0.1380 (0.1710) loss: 0.8136 (0.8112) time: 0.1594 data: 0.0705 max mem: 9377 +Train: [93] [ 900/6250] eta: 0:13:59 lr: 0.000002 grad: 0.1384 (0.1684) loss: 0.8205 (0.8118) time: 0.1197 data: 0.0182 max mem: 9377 +Train: [93] [1000/6250] eta: 0:13:46 lr: 0.000002 grad: 0.1456 (0.1663) loss: 0.8133 (0.8121) time: 0.1574 data: 0.0670 max mem: 9377 +Train: [93] [1100/6250] eta: 0:13:26 lr: 0.000002 grad: 0.1408 (0.1646) loss: 0.8124 (0.8123) time: 0.1519 data: 0.0669 max mem: 9377 +Train: [93] [1200/6250] eta: 0:13:08 lr: 0.000002 grad: 0.1385 (0.1629) loss: 0.8186 (0.8125) time: 0.1481 data: 0.0652 max mem: 9377 +Train: [93] [1300/6250] eta: 0:12:46 lr: 0.000002 grad: 0.1459 (0.1618) loss: 0.8115 (0.8126) time: 0.1455 data: 0.0489 max mem: 9377 +Train: [93] [1400/6250] eta: 0:12:27 lr: 0.000002 grad: 0.1435 (0.1611) loss: 0.8086 (0.8124) time: 0.1576 data: 0.0661 max mem: 9377 +Train: [93] [1500/6250] eta: 0:12:09 lr: 0.000002 grad: 0.1472 (0.1605) loss: 0.8086 (0.8122) time: 0.1356 data: 0.0512 max mem: 9377 +Train: [93] [1600/6250] eta: 0:11:55 lr: 0.000002 grad: 0.1479 (0.1602) loss: 0.8087 (0.8119) time: 0.1294 data: 0.0505 max mem: 9377 +Train: [93] [1700/6250] eta: 0:11:38 lr: 0.000002 grad: 0.1487 (0.1597) loss: 0.8158 (0.8116) time: 0.1436 data: 0.0525 max mem: 9377 +Train: [93] [1800/6250] eta: 0:11:21 lr: 0.000002 grad: 0.1518 (0.1591) loss: 0.8096 (0.8114) time: 0.1465 data: 0.0638 max mem: 9377 +Train: [93] [1900/6250] eta: 0:11:01 lr: 0.000002 grad: 0.1504 (0.1589) loss: 0.8062 (0.8112) time: 0.1149 data: 0.0260 max mem: 9377 +Train: [93] [2000/6250] eta: 0:10:44 lr: 0.000002 grad: 0.1628 (0.1588) loss: 0.8025 (0.8109) time: 0.1437 data: 0.0646 max mem: 9377 +Train: [93] [2100/6250] eta: 0:10:28 lr: 0.000002 grad: 0.1422 (0.1587) loss: 0.8093 (0.8108) time: 0.1574 data: 0.0764 max mem: 9377 +Train: [93] [2200/6250] eta: 0:10:12 lr: 0.000002 grad: 0.1457 (0.1586) loss: 0.8073 (0.8107) time: 0.1586 data: 0.0779 max mem: 9377 +Train: [93] [2300/6250] eta: 0:09:56 lr: 0.000001 grad: 0.1656 (0.1586) loss: 0.8045 (0.8105) time: 0.1172 data: 0.0341 max mem: 9377 +Train: [93] [2400/6250] eta: 0:09:40 lr: 0.000001 grad: 0.1464 (0.1584) loss: 0.8089 (0.8104) time: 0.1394 data: 0.0577 max mem: 9377 +Train: [93] [2500/6250] eta: 0:09:24 lr: 0.000001 grad: 0.1528 (0.1582) loss: 0.8061 (0.8104) time: 0.1286 data: 0.0434 max mem: 9377 +Train: [93] [2600/6250] eta: 0:09:08 lr: 0.000001 grad: 0.1547 (0.1582) loss: 0.8047 (0.8104) time: 0.1582 data: 0.0764 max mem: 9377 +Train: [93] [2700/6250] eta: 0:08:52 lr: 0.000001 grad: 0.1527 (0.1582) loss: 0.8111 (0.8103) time: 0.1513 data: 0.0710 max mem: 9377 +Train: [93] [2800/6250] eta: 0:08:37 lr: 0.000001 grad: 0.1447 (0.1582) loss: 0.8117 (0.8102) time: 0.1436 data: 0.0592 max mem: 9377 +Train: [93] [2900/6250] eta: 0:08:22 lr: 0.000001 grad: 0.1519 (0.1582) loss: 0.8077 (0.8102) time: 0.1380 data: 0.0566 max mem: 9377 +Train: [93] [3000/6250] eta: 0:08:06 lr: 0.000001 grad: 0.1495 (0.1582) loss: 0.8072 (0.8101) time: 0.1287 data: 0.0491 max mem: 9377 +Train: [93] [3100/6250] eta: 0:07:51 lr: 0.000001 grad: 0.1527 (0.1582) loss: 0.8057 (0.8100) time: 0.1322 data: 0.0458 max mem: 9377 +Train: [93] [3200/6250] eta: 0:07:37 lr: 0.000001 grad: 0.1452 (0.1581) loss: 0.8140 (0.8100) time: 0.1766 data: 0.0706 max mem: 9377 +Train: [93] [3300/6250] eta: 0:07:24 lr: 0.000001 grad: 0.1571 (0.1581) loss: 0.8040 (0.8098) time: 0.1673 data: 0.0868 max mem: 9377 +Train: [93] [3400/6250] eta: 0:07:11 lr: 0.000001 grad: 0.1496 (0.1580) loss: 0.8077 (0.8098) time: 0.2006 data: 0.1164 max mem: 9377 +Train: [93] [3500/6250] eta: 0:06:55 lr: 0.000001 grad: 0.1436 (0.1579) loss: 0.8150 (0.8098) time: 0.1458 data: 0.0627 max mem: 9377 +Train: [93] [3600/6250] eta: 0:06:40 lr: 0.000001 grad: 0.1422 (0.1578) loss: 0.8077 (0.8098) time: 0.1283 data: 0.0450 max mem: 9377 +Train: [93] [3700/6250] eta: 0:06:24 lr: 0.000001 grad: 0.1595 (0.1579) loss: 0.7993 (0.8098) time: 0.1418 data: 0.0591 max mem: 9377 +Train: [93] [3800/6250] eta: 0:06:09 lr: 0.000001 grad: 0.1441 (0.1578) loss: 0.8104 (0.8097) time: 0.1502 data: 0.0659 max mem: 9377 +Train: [93] [3900/6250] eta: 0:05:53 lr: 0.000001 grad: 0.1487 (0.1578) loss: 0.8127 (0.8096) time: 0.1339 data: 0.0511 max mem: 9377 +Train: [93] [4000/6250] eta: 0:05:37 lr: 0.000001 grad: 0.1595 (0.1578) loss: 0.7994 (0.8096) time: 0.1342 data: 0.0441 max mem: 9377 +Train: [93] [4100/6250] eta: 0:05:22 lr: 0.000001 grad: 0.1526 (0.1579) loss: 0.8075 (0.8095) time: 0.1628 data: 0.0715 max mem: 9377 +Train: [93] [4200/6250] eta: 0:05:06 lr: 0.000001 grad: 0.1545 (0.1579) loss: 0.8100 (0.8095) time: 0.1528 data: 0.0702 max mem: 9377 +Train: [93] [4300/6250] eta: 0:04:51 lr: 0.000001 grad: 0.1505 (0.1579) loss: 0.8028 (0.8094) time: 0.1273 data: 0.0438 max mem: 9377 +Train: [93] [4400/6250] eta: 0:04:36 lr: 0.000001 grad: 0.1497 (0.1579) loss: 0.8079 (0.8094) time: 0.1322 data: 0.0476 max mem: 9377 +Train: [93] [4500/6250] eta: 0:04:20 lr: 0.000001 grad: 0.1429 (0.1578) loss: 0.8151 (0.8094) time: 0.1331 data: 0.0489 max mem: 9377 +Train: [93] [4600/6250] eta: 0:04:05 lr: 0.000001 grad: 0.1473 (0.1578) loss: 0.8113 (0.8094) time: 0.1296 data: 0.0460 max mem: 9377 +Train: [93] [4700/6250] eta: 0:03:50 lr: 0.000001 grad: 0.1392 (0.1576) loss: 0.8092 (0.8094) time: 0.1191 data: 0.0352 max mem: 9377 +Train: [93] [4800/6250] eta: 0:03:35 lr: 0.000001 grad: 0.1543 (0.1576) loss: 0.8116 (0.8094) time: 0.1354 data: 0.0504 max mem: 9377 +Train: [93] [4900/6250] eta: 0:03:20 lr: 0.000001 grad: 0.1522 (0.1576) loss: 0.8091 (0.8094) time: 0.1372 data: 0.0548 max mem: 9377 +Train: [93] [5000/6250] eta: 0:03:05 lr: 0.000001 grad: 0.1559 (0.1575) loss: 0.8042 (0.8094) time: 0.1471 data: 0.0646 max mem: 9377 +Train: [93] [5100/6250] eta: 0:02:50 lr: 0.000001 grad: 0.1447 (0.1574) loss: 0.8103 (0.8093) time: 0.1358 data: 0.0577 max mem: 9377 +Train: [93] [5200/6250] eta: 0:02:34 lr: 0.000001 grad: 0.1462 (0.1573) loss: 0.8049 (0.8093) time: 0.1111 data: 0.0252 max mem: 9377 +Train: [93] [5300/6250] eta: 0:02:20 lr: 0.000001 grad: 0.1456 (0.1573) loss: 0.8101 (0.8092) time: 0.1909 data: 0.1122 max mem: 9377 +Train: [93] [5400/6250] eta: 0:02:05 lr: 0.000001 grad: 0.1634 (0.1574) loss: 0.8064 (0.8092) time: 0.1378 data: 0.0533 max mem: 9377 +Train: [93] [5500/6250] eta: 0:01:51 lr: 0.000001 grad: 0.1453 (0.1573) loss: 0.8117 (0.8092) time: 0.1488 data: 0.0697 max mem: 9377 +Train: [93] [5600/6250] eta: 0:01:36 lr: 0.000001 grad: 0.1580 (0.1572) loss: 0.8015 (0.8092) time: 0.1370 data: 0.0554 max mem: 9377 +Train: [93] [5700/6250] eta: 0:01:21 lr: 0.000001 grad: 0.1525 (0.1572) loss: 0.8069 (0.8092) time: 0.1344 data: 0.0534 max mem: 9377 +Train: [93] [5800/6250] eta: 0:01:06 lr: 0.000001 grad: 0.1377 (0.1571) loss: 0.8156 (0.8092) time: 0.1586 data: 0.0781 max mem: 9377 +Train: [93] [5900/6250] eta: 0:00:51 lr: 0.000001 grad: 0.1472 (0.1569) loss: 0.8071 (0.8093) time: 0.1421 data: 0.0616 max mem: 9377 +Train: [93] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.1444 (0.1569) loss: 0.8066 (0.8092) time: 0.1470 data: 0.0599 max mem: 9377 +Train: [93] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1451 (0.1569) loss: 0.8131 (0.8092) time: 0.1378 data: 0.0505 max mem: 9377 +Train: [93] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1562 (0.1569) loss: 0.7997 (0.8092) time: 0.1352 data: 0.0411 max mem: 9377 +Train: [93] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1459 (0.1569) loss: 0.8122 (0.8092) time: 0.1394 data: 0.0526 max mem: 9377 +Train: [93] Total time: 0:15:29 (0.1487 s / it) +Averaged stats: lr: 0.000001 grad: 0.1459 (0.1569) loss: 0.8122 (0.8092) +Eval (hcp-train-subset): [93] [ 0/62] eta: 0:04:26 loss: 0.8154 (0.8154) time: 4.2929 data: 4.2158 max mem: 9377 +Eval (hcp-train-subset): [93] [61/62] eta: 0:00:00 loss: 0.8072 (0.8074) time: 0.1131 data: 0.0860 max mem: 9377 +Eval (hcp-train-subset): [93] Total time: 0:00:13 (0.2175 s / it) +Averaged stats (hcp-train-subset): loss: 0.8072 (0.8074) +Eval (hcp-val): [93] [ 0/62] eta: 0:03:56 loss: 0.8267 (0.8267) time: 3.8118 data: 3.7575 max mem: 9377 +Eval (hcp-val): [93] [61/62] eta: 0:00:00 loss: 0.8271 (0.8291) time: 0.1348 data: 0.1093 max mem: 9377 +Eval (hcp-val): [93] Total time: 0:00:14 (0.2387 s / it) +Averaged stats (hcp-val): loss: 0.8271 (0.8291) +Eval (nsd-val): [93] [ 0/62] eta: 0:07:16 loss: 0.7989 (0.7989) time: 7.0439 data: 7.0119 max mem: 9377 +Eval (nsd-val): [93] [61/62] eta: 0:00:00 loss: 0.8080 (0.8117) time: 0.1310 data: 0.1055 max mem: 9377 +Eval (nsd-val): [93] Total time: 0:00:14 (0.2395 s / it) +Averaged stats (nsd-val): loss: 0.8080 (0.8117) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [94] [ 0/6250] eta: 7:52:37 lr: 0.000001 grad: 0.1003 (0.1003) loss: 0.8557 (0.8557) time: 4.5373 data: 4.3062 max mem: 9377 +Train: [94] [ 100/6250] eta: 0:21:00 lr: 0.000001 grad: 0.1463 (0.2076) loss: 0.8272 (0.8198) time: 0.1651 data: 0.0641 max mem: 9377 +Train: [94] [ 200/6250] eta: 0:17:43 lr: 0.000001 grad: 0.1513 (0.1798) loss: 0.8226 (0.8198) time: 0.1534 data: 0.0524 max mem: 9377 +Train: [94] [ 300/6250] eta: 0:16:23 lr: 0.000001 grad: 0.1569 (0.1718) loss: 0.8196 (0.8197) time: 0.1470 data: 0.0408 max mem: 9377 +Train: [94] [ 400/6250] eta: 0:15:38 lr: 0.000001 grad: 0.1397 (0.1661) loss: 0.8259 (0.8200) time: 0.1472 data: 0.0484 max mem: 9377 +Train: [94] [ 500/6250] eta: 0:15:13 lr: 0.000001 grad: 0.1588 (0.1628) loss: 0.8139 (0.8197) time: 0.1475 data: 0.0578 max mem: 9377 +Train: [94] [ 600/6250] eta: 0:14:43 lr: 0.000001 grad: 0.1583 (0.1618) loss: 0.8119 (0.8185) time: 0.1404 data: 0.0483 max mem: 9377 +Train: [94] [ 700/6250] eta: 0:14:22 lr: 0.000001 grad: 0.1551 (0.1616) loss: 0.8198 (0.8175) time: 0.1314 data: 0.0387 max mem: 9377 +Train: [94] [ 800/6250] eta: 0:14:05 lr: 0.000001 grad: 0.1509 (0.1613) loss: 0.8113 (0.8166) time: 0.1390 data: 0.0424 max mem: 9377 +Train: [94] [ 900/6250] eta: 0:13:45 lr: 0.000001 grad: 0.1478 (0.1616) loss: 0.8183 (0.8156) time: 0.1535 data: 0.0582 max mem: 9377 +Train: [94] [1000/6250] eta: 0:13:35 lr: 0.000001 grad: 0.1490 (0.1616) loss: 0.8076 (0.8147) time: 0.1481 data: 0.0517 max mem: 9377 +Train: [94] [1100/6250] eta: 0:13:14 lr: 0.000001 grad: 0.1416 (0.1613) loss: 0.8128 (0.8140) time: 0.1568 data: 0.0736 max mem: 9377 +Train: [94] [1200/6250] eta: 0:13:05 lr: 0.000001 grad: 0.1616 (0.1610) loss: 0.8075 (0.8134) time: 0.1617 data: 0.0640 max mem: 9377 +Train: [94] [1300/6250] eta: 0:12:59 lr: 0.000001 grad: 0.1487 (0.1606) loss: 0.8068 (0.8130) time: 0.1981 data: 0.1157 max mem: 9377 +Train: [94] [1400/6250] eta: 0:12:41 lr: 0.000001 grad: 0.1506 (0.1603) loss: 0.8091 (0.8126) time: 0.1438 data: 0.0626 max mem: 9377 +Train: [94] [1500/6250] eta: 0:12:21 lr: 0.000001 grad: 0.1474 (0.1602) loss: 0.8056 (0.8122) time: 0.1534 data: 0.0759 max mem: 9377 +Train: [94] [1600/6250] eta: 0:12:01 lr: 0.000001 grad: 0.1576 (0.1601) loss: 0.8079 (0.8119) time: 0.1406 data: 0.0599 max mem: 9377 +Train: [94] [1700/6250] eta: 0:11:42 lr: 0.000001 grad: 0.1533 (0.1600) loss: 0.8127 (0.8116) time: 0.1438 data: 0.0636 max mem: 9377 +Train: [94] [1800/6250] eta: 0:11:23 lr: 0.000001 grad: 0.1602 (0.1601) loss: 0.8011 (0.8111) time: 0.1455 data: 0.0638 max mem: 9377 +Train: [94] [1900/6250] eta: 0:11:05 lr: 0.000001 grad: 0.1517 (0.1601) loss: 0.8035 (0.8107) time: 0.1340 data: 0.0474 max mem: 9377 +Train: [94] [2000/6250] eta: 0:10:46 lr: 0.000001 grad: 0.1571 (0.1600) loss: 0.8005 (0.8105) time: 0.1646 data: 0.0833 max mem: 9377 +Train: [94] [2100/6250] eta: 0:10:29 lr: 0.000001 grad: 0.1456 (0.1597) loss: 0.8076 (0.8104) time: 0.1304 data: 0.0464 max mem: 9377 +Train: [94] [2200/6250] eta: 0:10:12 lr: 0.000001 grad: 0.1440 (0.1592) loss: 0.8091 (0.8102) time: 0.1458 data: 0.0669 max mem: 9377 +Train: [94] [2300/6250] eta: 0:09:55 lr: 0.000001 grad: 0.1589 (0.1589) loss: 0.8048 (0.8101) time: 0.1512 data: 0.0661 max mem: 9377 +Train: [94] [2400/6250] eta: 0:09:37 lr: 0.000001 grad: 0.1468 (0.1588) loss: 0.7990 (0.8100) time: 0.1113 data: 0.0257 max mem: 9377 +Train: [94] [2500/6250] eta: 0:09:19 lr: 0.000001 grad: 0.1348 (0.1584) loss: 0.8163 (0.8100) time: 0.1417 data: 0.0586 max mem: 9377 +Train: [94] [2600/6250] eta: 0:09:03 lr: 0.000001 grad: 0.1401 (0.1580) loss: 0.8101 (0.8100) time: 0.1240 data: 0.0355 max mem: 9377 +Train: [94] [2700/6250] eta: 0:08:47 lr: 0.000001 grad: 0.1346 (0.1577) loss: 0.8132 (0.8100) time: 0.1452 data: 0.0618 max mem: 9377 +Train: [94] [2800/6250] eta: 0:08:33 lr: 0.000001 grad: 0.1492 (0.1572) loss: 0.8098 (0.8101) time: 0.1795 data: 0.0827 max mem: 9377 +Train: [94] [2900/6250] eta: 0:08:20 lr: 0.000001 grad: 0.1550 (0.1569) loss: 0.8012 (0.8102) time: 0.2177 data: 0.1273 max mem: 9377 +Train: [94] [3000/6250] eta: 0:08:07 lr: 0.000001 grad: 0.1560 (0.1568) loss: 0.8074 (0.8101) time: 0.1591 data: 0.0762 max mem: 9377 +Train: [94] [3100/6250] eta: 0:07:53 lr: 0.000001 grad: 0.1487 (0.1566) loss: 0.8109 (0.8101) time: 0.1662 data: 0.0852 max mem: 9377 +Train: [94] [3200/6250] eta: 0:07:38 lr: 0.000001 grad: 0.1476 (0.1565) loss: 0.8156 (0.8101) time: 0.1365 data: 0.0568 max mem: 9377 +Train: [94] [3300/6250] eta: 0:07:24 lr: 0.000001 grad: 0.1410 (0.1562) loss: 0.8076 (0.8102) time: 0.1538 data: 0.0675 max mem: 9377 +Train: [94] [3400/6250] eta: 0:07:08 lr: 0.000001 grad: 0.1540 (0.1561) loss: 0.8074 (0.8102) time: 0.1364 data: 0.0554 max mem: 9377 +Train: [94] [3500/6250] eta: 0:06:52 lr: 0.000001 grad: 0.1538 (0.1559) loss: 0.8123 (0.8102) time: 0.1180 data: 0.0347 max mem: 9377 +Train: [94] [3600/6250] eta: 0:06:37 lr: 0.000001 grad: 0.1568 (0.1558) loss: 0.8091 (0.8102) time: 0.1534 data: 0.0722 max mem: 9377 +Train: [94] [3700/6250] eta: 0:06:21 lr: 0.000001 grad: 0.1465 (0.1557) loss: 0.8122 (0.8102) time: 0.1402 data: 0.0549 max mem: 9377 +Train: [94] [3800/6250] eta: 0:06:05 lr: 0.000001 grad: 0.1365 (0.1556) loss: 0.8149 (0.8103) time: 0.1186 data: 0.0245 max mem: 9377 +Train: [94] [3900/6250] eta: 0:05:49 lr: 0.000001 grad: 0.1465 (0.1554) loss: 0.8148 (0.8104) time: 0.1275 data: 0.0367 max mem: 9377 +Train: [94] [4000/6250] eta: 0:05:34 lr: 0.000001 grad: 0.1503 (0.1553) loss: 0.8118 (0.8104) time: 0.1536 data: 0.0702 max mem: 9377 +Train: [94] [4100/6250] eta: 0:05:18 lr: 0.000001 grad: 0.1470 (0.1552) loss: 0.8123 (0.8104) time: 0.1582 data: 0.0726 max mem: 9377 +Train: [94] [4200/6250] eta: 0:05:03 lr: 0.000001 grad: 0.1382 (0.1550) loss: 0.8094 (0.8105) time: 0.1502 data: 0.0694 max mem: 9377 +Train: [94] [4300/6250] eta: 0:04:48 lr: 0.000001 grad: 0.1512 (0.1549) loss: 0.8054 (0.8105) time: 0.1384 data: 0.0538 max mem: 9377 +Train: [94] [4400/6250] eta: 0:04:33 lr: 0.000001 grad: 0.1411 (0.1548) loss: 0.8161 (0.8105) time: 0.1535 data: 0.0776 max mem: 9377 +Train: [94] [4500/6250] eta: 0:04:18 lr: 0.000001 grad: 0.1494 (0.1547) loss: 0.8116 (0.8105) time: 0.1479 data: 0.0660 max mem: 9377 +Train: [94] [4600/6250] eta: 0:04:03 lr: 0.000001 grad: 0.1518 (0.1546) loss: 0.8073 (0.8104) time: 0.1350 data: 0.0527 max mem: 9377 +Train: [94] [4700/6250] eta: 0:03:49 lr: 0.000001 grad: 0.1448 (0.1546) loss: 0.8148 (0.8105) time: 0.1537 data: 0.0616 max mem: 9377 +Train: [94] [4800/6250] eta: 0:03:34 lr: 0.000001 grad: 0.1496 (0.1546) loss: 0.8062 (0.8104) time: 0.1381 data: 0.0534 max mem: 9377 +Train: [94] [4900/6250] eta: 0:03:19 lr: 0.000001 grad: 0.1488 (0.1547) loss: 0.8083 (0.8103) time: 0.1601 data: 0.0730 max mem: 9377 +Train: [94] [5000/6250] eta: 0:03:05 lr: 0.000001 grad: 0.1496 (0.1546) loss: 0.8093 (0.8103) time: 0.1101 data: 0.0190 max mem: 9377 +Train: [94] [5100/6250] eta: 0:02:50 lr: 0.000001 grad: 0.1437 (0.1546) loss: 0.8143 (0.8103) time: 0.1835 data: 0.0922 max mem: 9377 +Train: [94] [5200/6250] eta: 0:02:36 lr: 0.000001 grad: 0.1517 (0.1545) loss: 0.8132 (0.8103) time: 0.1573 data: 0.0780 max mem: 9377 +Train: [94] [5300/6250] eta: 0:02:21 lr: 0.000001 grad: 0.1479 (0.1545) loss: 0.8113 (0.8103) time: 0.1589 data: 0.0766 max mem: 9377 +Train: [94] [5400/6250] eta: 0:02:06 lr: 0.000001 grad: 0.1368 (0.1545) loss: 0.8120 (0.8103) time: 0.1501 data: 0.0629 max mem: 9377 +Train: [94] [5500/6250] eta: 0:01:52 lr: 0.000001 grad: 0.1453 (0.1544) loss: 0.8162 (0.8103) time: 0.1588 data: 0.0799 max mem: 9377 +Train: [94] [5600/6250] eta: 0:01:37 lr: 0.000001 grad: 0.1609 (0.1544) loss: 0.8076 (0.8103) time: 0.1263 data: 0.0340 max mem: 9377 +Train: [94] [5700/6250] eta: 0:01:22 lr: 0.000001 grad: 0.1467 (0.1543) loss: 0.8119 (0.8104) time: 0.1200 data: 0.0312 max mem: 9377 +Train: [94] [5800/6250] eta: 0:01:07 lr: 0.000001 grad: 0.1519 (0.1543) loss: 0.8045 (0.8104) time: 0.1426 data: 0.0619 max mem: 9377 +Train: [94] [5900/6250] eta: 0:00:52 lr: 0.000001 grad: 0.1413 (0.1542) loss: 0.8100 (0.8104) time: 0.1369 data: 0.0545 max mem: 9377 +Train: [94] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.1415 (0.1541) loss: 0.8130 (0.8104) time: 0.1572 data: 0.0620 max mem: 9377 +Train: [94] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1443 (0.1540) loss: 0.8057 (0.8104) time: 0.1366 data: 0.0530 max mem: 9377 +Train: [94] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1574 (0.1540) loss: 0.8056 (0.8103) time: 0.1373 data: 0.0502 max mem: 9377 +Train: [94] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1484 (0.1540) loss: 0.8069 (0.8103) time: 0.1513 data: 0.0671 max mem: 9377 +Train: [94] Total time: 0:15:34 (0.1496 s / it) +Averaged stats: lr: 0.000001 grad: 0.1484 (0.1540) loss: 0.8069 (0.8103) +Eval (hcp-train-subset): [94] [ 0/62] eta: 0:06:19 loss: 0.8158 (0.8158) time: 6.1176 data: 6.0842 max mem: 9377 +Eval (hcp-train-subset): [94] [61/62] eta: 0:00:00 loss: 0.8058 (0.8075) time: 0.1129 data: 0.0876 max mem: 9377 +Eval (hcp-train-subset): [94] Total time: 0:00:13 (0.2228 s / it) +Averaged stats (hcp-train-subset): loss: 0.8058 (0.8075) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [94] [ 0/62] eta: 0:03:42 loss: 0.8229 (0.8229) time: 3.5830 data: 3.5043 max mem: 9377 +Eval (hcp-val): [94] [61/62] eta: 0:00:00 loss: 0.8292 (0.8293) time: 0.1382 data: 0.1114 max mem: 9377 +Eval (hcp-val): [94] Total time: 0:00:13 (0.2180 s / it) +Averaged stats (hcp-val): loss: 0.8292 (0.8293) +Making plots (hcp-val): example=43 +Eval (nsd-val): [94] [ 0/62] eta: 0:03:49 loss: 0.8035 (0.8035) time: 3.7085 data: 3.6318 max mem: 9377 +Eval (nsd-val): [94] [61/62] eta: 0:00:00 loss: 0.8099 (0.8119) time: 0.0952 data: 0.0703 max mem: 9377 +Eval (nsd-val): [94] Total time: 0:00:13 (0.2103 s / it) +Averaged stats (nsd-val): loss: 0.8099 (0.8119) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00094.pth +Train: [95] [ 0/6250] eta: 9:32:09 lr: 0.000001 grad: 0.1732 (0.1732) loss: 0.8154 (0.8154) time: 5.4927 data: 5.3101 max mem: 9377 +Train: [95] [ 100/6250] eta: 0:21:28 lr: 0.000001 grad: 0.1826 (0.1869) loss: 0.7953 (0.8091) time: 0.1528 data: 0.0425 max mem: 9377 +Train: [95] [ 200/6250] eta: 0:18:44 lr: 0.000001 grad: 0.1637 (0.1860) loss: 0.8110 (0.8046) time: 0.1570 data: 0.0459 max mem: 9377 +Train: [95] [ 300/6250] eta: 0:17:20 lr: 0.000001 grad: 0.1622 (0.1797) loss: 0.8016 (0.8058) time: 0.1447 data: 0.0519 max mem: 9377 +Train: [95] [ 400/6250] eta: 0:16:15 lr: 0.000001 grad: 0.1311 (0.1734) loss: 0.8217 (0.8080) time: 0.1489 data: 0.0577 max mem: 9377 +Train: [95] [ 500/6250] eta: 0:15:34 lr: 0.000001 grad: 0.1411 (0.1685) loss: 0.8208 (0.8100) time: 0.1446 data: 0.0571 max mem: 9377 +Train: [95] [ 600/6250] eta: 0:15:15 lr: 0.000001 grad: 0.1404 (0.1659) loss: 0.8128 (0.8101) time: 0.1735 data: 0.0878 max mem: 9377 +Train: [95] [ 700/6250] eta: 0:15:12 lr: 0.000001 grad: 0.1456 (0.1647) loss: 0.8148 (0.8101) time: 0.2179 data: 0.1270 max mem: 9377 +Train: [95] [ 800/6250] eta: 0:15:01 lr: 0.000001 grad: 0.1572 (0.1639) loss: 0.8156 (0.8103) time: 0.1993 data: 0.0970 max mem: 9377 +Train: [95] [ 900/6250] eta: 0:15:01 lr: 0.000001 grad: 0.1477 (0.1629) loss: 0.8115 (0.8104) time: 0.2427 data: 0.1473 max mem: 9377 +Train: [95] [1000/6250] eta: 0:14:49 lr: 0.000001 grad: 0.1487 (0.1623) loss: 0.8172 (0.8106) time: 0.1992 data: 0.1122 max mem: 9377 +Train: [95] [1100/6250] eta: 0:14:22 lr: 0.000001 grad: 0.1542 (0.1617) loss: 0.8111 (0.8106) time: 0.1730 data: 0.0918 max mem: 9377 +Train: [95] [1200/6250] eta: 0:13:53 lr: 0.000001 grad: 0.1473 (0.1610) loss: 0.8158 (0.8107) time: 0.1689 data: 0.0761 max mem: 9377 +Train: [95] [1300/6250] eta: 0:13:30 lr: 0.000001 grad: 0.1504 (0.1606) loss: 0.8170 (0.8108) time: 0.1160 data: 0.0364 max mem: 9377 +Train: [95] [1400/6250] eta: 0:13:07 lr: 0.000001 grad: 0.1568 (0.1603) loss: 0.8079 (0.8108) time: 0.1416 data: 0.0557 max mem: 9377 +Train: [95] [1500/6250] eta: 0:12:46 lr: 0.000001 grad: 0.1453 (0.1595) loss: 0.8104 (0.8110) time: 0.1513 data: 0.0676 max mem: 9377 +Train: [95] [1600/6250] eta: 0:12:26 lr: 0.000001 grad: 0.1305 (0.1589) loss: 0.8185 (0.8111) time: 0.1512 data: 0.0592 max mem: 9377 +Train: [95] [1700/6250] eta: 0:12:05 lr: 0.000001 grad: 0.1432 (0.1581) loss: 0.8189 (0.8114) time: 0.1526 data: 0.0619 max mem: 9377 +Train: [95] [1800/6250] eta: 0:11:45 lr: 0.000001 grad: 0.1430 (0.1574) loss: 0.8151 (0.8117) time: 0.1200 data: 0.0333 max mem: 9377 +Train: [95] [1900/6250] eta: 0:11:26 lr: 0.000001 grad: 0.1513 (0.1569) loss: 0.8110 (0.8119) time: 0.1620 data: 0.0776 max mem: 9377 +Train: [95] [2000/6250] eta: 0:11:06 lr: 0.000001 grad: 0.1361 (0.1564) loss: 0.8204 (0.8120) time: 0.1336 data: 0.0462 max mem: 9377 +Train: [95] [2100/6250] eta: 0:10:47 lr: 0.000001 grad: 0.1440 (0.1560) loss: 0.8104 (0.8122) time: 0.1452 data: 0.0640 max mem: 9377 +Train: [95] [2200/6250] eta: 0:10:29 lr: 0.000001 grad: 0.1477 (0.1554) loss: 0.8129 (0.8123) time: 0.1519 data: 0.0695 max mem: 9377 +Train: [95] [2300/6250] eta: 0:10:11 lr: 0.000001 grad: 0.1452 (0.1549) loss: 0.8115 (0.8124) time: 0.1387 data: 0.0488 max mem: 9377 +Train: [95] [2400/6250] eta: 0:09:54 lr: 0.000001 grad: 0.1360 (0.1545) loss: 0.8212 (0.8126) time: 0.1479 data: 0.0647 max mem: 9377 +Train: [95] [2500/6250] eta: 0:09:38 lr: 0.000001 grad: 0.1426 (0.1541) loss: 0.8106 (0.8126) time: 0.1478 data: 0.0642 max mem: 9377 +Train: [95] [2600/6250] eta: 0:09:25 lr: 0.000001 grad: 0.1503 (0.1539) loss: 0.8144 (0.8127) time: 0.1563 data: 0.0743 max mem: 9377 +Train: [95] [2700/6250] eta: 0:09:08 lr: 0.000001 grad: 0.1422 (0.1538) loss: 0.8180 (0.8127) time: 0.1430 data: 0.0565 max mem: 9377 +Train: [95] [2800/6250] eta: 0:08:52 lr: 0.000001 grad: 0.1451 (0.1537) loss: 0.8146 (0.8127) time: 0.1391 data: 0.0598 max mem: 9377 +Train: [95] [2900/6250] eta: 0:08:35 lr: 0.000001 grad: 0.1474 (0.1535) loss: 0.8145 (0.8128) time: 0.1356 data: 0.0484 max mem: 9377 +Train: [95] [3000/6250] eta: 0:08:19 lr: 0.000001 grad: 0.1424 (0.1534) loss: 0.8189 (0.8129) time: 0.1319 data: 0.0489 max mem: 9377 +Train: [95] [3100/6250] eta: 0:08:04 lr: 0.000001 grad: 0.1393 (0.1532) loss: 0.8198 (0.8130) time: 0.1436 data: 0.0522 max mem: 9377 +Train: [95] [3200/6250] eta: 0:07:49 lr: 0.000001 grad: 0.1505 (0.1531) loss: 0.8191 (0.8131) time: 0.1652 data: 0.0742 max mem: 9377 +Train: [95] [3300/6250] eta: 0:07:34 lr: 0.000001 grad: 0.1467 (0.1529) loss: 0.8176 (0.8132) time: 0.1620 data: 0.0676 max mem: 9377 +Train: [95] [3400/6250] eta: 0:07:18 lr: 0.000001 grad: 0.1449 (0.1531) loss: 0.8157 (0.8133) time: 0.1524 data: 0.0712 max mem: 9377 +Train: [95] [3500/6250] eta: 0:07:01 lr: 0.000001 grad: 0.1479 (0.1529) loss: 0.8196 (0.8134) time: 0.1521 data: 0.0735 max mem: 9377 +Train: [95] [3600/6250] eta: 0:06:45 lr: 0.000001 grad: 0.1343 (0.1528) loss: 0.8238 (0.8135) time: 0.1291 data: 0.0455 max mem: 9377 +Train: [95] [3700/6250] eta: 0:06:30 lr: 0.000001 grad: 0.1396 (0.1527) loss: 0.8182 (0.8135) time: 0.1652 data: 0.0857 max mem: 9377 +Train: [95] [3800/6250] eta: 0:06:14 lr: 0.000001 grad: 0.1454 (0.1526) loss: 0.8195 (0.8136) time: 0.1485 data: 0.0629 max mem: 9377 +Train: [95] [3900/6250] eta: 0:05:58 lr: 0.000001 grad: 0.1485 (0.1525) loss: 0.8161 (0.8137) time: 0.1432 data: 0.0662 max mem: 9377 +Train: [95] [4000/6250] eta: 0:05:43 lr: 0.000001 grad: 0.1417 (0.1524) loss: 0.8160 (0.8138) time: 0.1188 data: 0.0356 max mem: 9377 +Train: [95] [4100/6250] eta: 0:05:29 lr: 0.000001 grad: 0.1419 (0.1523) loss: 0.8189 (0.8138) time: 0.2695 data: 0.1833 max mem: 9377 +Train: [95] [4200/6250] eta: 0:05:13 lr: 0.000001 grad: 0.1580 (0.1523) loss: 0.8071 (0.8138) time: 0.1057 data: 0.0131 max mem: 9377 +Train: [95] [4300/6250] eta: 0:04:57 lr: 0.000001 grad: 0.1458 (0.1522) loss: 0.8177 (0.8139) time: 0.1397 data: 0.0616 max mem: 9377 +Train: [95] [4400/6250] eta: 0:04:42 lr: 0.000001 grad: 0.1511 (0.1523) loss: 0.8086 (0.8138) time: 0.1576 data: 0.0747 max mem: 9377 +Train: [95] [4500/6250] eta: 0:04:27 lr: 0.000001 grad: 0.1491 (0.1523) loss: 0.8116 (0.8138) time: 0.1358 data: 0.0505 max mem: 9377 +Train: [95] [4600/6250] eta: 0:04:12 lr: 0.000001 grad: 0.1539 (0.1523) loss: 0.8089 (0.8138) time: 0.1581 data: 0.0627 max mem: 9377 +Train: [95] [4700/6250] eta: 0:03:57 lr: 0.000001 grad: 0.1539 (0.1524) loss: 0.8096 (0.8137) time: 0.1555 data: 0.0733 max mem: 9377 +Train: [95] [4800/6250] eta: 0:03:42 lr: 0.000001 grad: 0.1563 (0.1526) loss: 0.8063 (0.8136) time: 0.1381 data: 0.0535 max mem: 9377 +Train: [95] [4900/6250] eta: 0:03:27 lr: 0.000001 grad: 0.1555 (0.1528) loss: 0.8112 (0.8135) time: 0.1561 data: 0.0732 max mem: 9377 +Train: [95] [5000/6250] eta: 0:03:12 lr: 0.000001 grad: 0.1555 (0.1529) loss: 0.8106 (0.8134) time: 0.1385 data: 0.0527 max mem: 9377 +Train: [95] [5100/6250] eta: 0:02:57 lr: 0.000001 grad: 0.1551 (0.1530) loss: 0.8040 (0.8133) time: 0.1778 data: 0.0924 max mem: 9377 +Train: [95] [5200/6250] eta: 0:02:41 lr: 0.000001 grad: 0.1613 (0.1532) loss: 0.8080 (0.8132) time: 0.1587 data: 0.0771 max mem: 9377 +Train: [95] [5300/6250] eta: 0:02:26 lr: 0.000001 grad: 0.1532 (0.1534) loss: 0.8049 (0.8131) time: 0.1255 data: 0.0311 max mem: 9377 +Train: [95] [5400/6250] eta: 0:02:10 lr: 0.000001 grad: 0.1453 (0.1534) loss: 0.8094 (0.8130) time: 0.1393 data: 0.0497 max mem: 9377 +Train: [95] [5500/6250] eta: 0:01:55 lr: 0.000001 grad: 0.1473 (0.1535) loss: 0.8157 (0.8130) time: 0.1468 data: 0.0498 max mem: 9377 +Train: [95] [5600/6250] eta: 0:01:39 lr: 0.000001 grad: 0.1601 (0.1535) loss: 0.8040 (0.8130) time: 0.1681 data: 0.0940 max mem: 9377 +Train: [95] [5700/6250] eta: 0:01:24 lr: 0.000001 grad: 0.1534 (0.1536) loss: 0.8094 (0.8129) time: 0.1492 data: 0.0623 max mem: 9377 +Train: [95] [5800/6250] eta: 0:01:09 lr: 0.000001 grad: 0.1514 (0.1536) loss: 0.8102 (0.8129) time: 0.1333 data: 0.0247 max mem: 9377 +Train: [95] [5900/6250] eta: 0:00:53 lr: 0.000001 grad: 0.1508 (0.1537) loss: 0.8077 (0.8129) time: 0.1486 data: 0.0566 max mem: 9377 +Train: [95] [6000/6250] eta: 0:00:38 lr: 0.000001 grad: 0.1565 (0.1537) loss: 0.8116 (0.8128) time: 0.1598 data: 0.0795 max mem: 9377 +Train: [95] [6100/6250] eta: 0:00:23 lr: 0.000001 grad: 0.1570 (0.1538) loss: 0.8094 (0.8128) time: 0.1076 data: 0.0003 max mem: 9377 +Train: [95] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1552 (0.1538) loss: 0.8068 (0.8128) time: 0.1376 data: 0.0533 max mem: 9377 +Train: [95] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1534 (0.1539) loss: 0.8096 (0.8128) time: 0.1498 data: 0.0618 max mem: 9377 +Train: [95] Total time: 0:16:07 (0.1548 s / it) +Averaged stats: lr: 0.000001 grad: 0.1534 (0.1539) loss: 0.8096 (0.8128) +Eval (hcp-train-subset): [95] [ 0/62] eta: 0:06:31 loss: 0.8154 (0.8154) time: 6.3204 data: 6.2887 max mem: 9377 +Eval (hcp-train-subset): [95] [61/62] eta: 0:00:00 loss: 0.8044 (0.8069) time: 0.1137 data: 0.0868 max mem: 9377 +Eval (hcp-train-subset): [95] Total time: 0:00:14 (0.2396 s / it) +Averaged stats (hcp-train-subset): loss: 0.8044 (0.8069) +Eval (hcp-val): [95] [ 0/62] eta: 0:05:17 loss: 0.8222 (0.8222) time: 5.1288 data: 5.0985 max mem: 9377 +Eval (hcp-val): [95] [61/62] eta: 0:00:00 loss: 0.8270 (0.8283) time: 0.1180 data: 0.0910 max mem: 9377 +Eval (hcp-val): [95] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (hcp-val): loss: 0.8270 (0.8283) +Eval (nsd-val): [95] [ 0/62] eta: 0:05:08 loss: 0.8052 (0.8052) time: 4.9733 data: 4.9074 max mem: 9377 +Eval (nsd-val): [95] [61/62] eta: 0:00:00 loss: 0.8116 (0.8130) time: 0.1250 data: 0.0993 max mem: 9377 +Eval (nsd-val): [95] Total time: 0:00:13 (0.2217 s / it) +Averaged stats (nsd-val): loss: 0.8116 (0.8130) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-best.pth +Train: [96] [ 0/6250] eta: 9:45:26 lr: 0.000001 grad: 0.4240 (0.4240) loss: 0.7623 (0.7623) time: 5.6203 data: 5.3048 max mem: 9377 +Train: [96] [ 100/6250] eta: 0:20:35 lr: 0.000001 grad: 0.1764 (0.1783) loss: 0.8127 (0.8158) time: 0.1479 data: 0.0377 max mem: 9377 +Train: [96] [ 200/6250] eta: 0:17:53 lr: 0.000001 grad: 0.1656 (0.1808) loss: 0.7995 (0.8095) time: 0.1704 data: 0.0679 max mem: 9377 +Train: [96] [ 300/6250] eta: 0:16:31 lr: 0.000001 grad: 0.1650 (0.1817) loss: 0.8033 (0.8050) time: 0.1445 data: 0.0393 max mem: 9377 +Train: [96] [ 400/6250] eta: 0:15:41 lr: 0.000001 grad: 0.1544 (0.1786) loss: 0.8113 (0.8044) time: 0.1475 data: 0.0482 max mem: 9377 +Train: [96] [ 500/6250] eta: 0:15:03 lr: 0.000001 grad: 0.1451 (0.1760) loss: 0.8125 (0.8048) time: 0.1635 data: 0.0702 max mem: 9377 +Train: [96] [ 600/6250] eta: 0:14:35 lr: 0.000001 grad: 0.1538 (0.1741) loss: 0.8087 (0.8055) time: 0.1564 data: 0.0717 max mem: 9377 +Train: [96] [ 700/6250] eta: 0:14:14 lr: 0.000001 grad: 0.1498 (0.1719) loss: 0.8129 (0.8062) time: 0.1154 data: 0.0224 max mem: 9377 +Train: [96] [ 800/6250] eta: 0:14:03 lr: 0.000001 grad: 0.1609 (0.1704) loss: 0.8086 (0.8067) time: 0.1579 data: 0.0739 max mem: 9377 +Train: [96] [ 900/6250] eta: 0:13:46 lr: 0.000001 grad: 0.1636 (0.1692) loss: 0.8141 (0.8072) time: 0.1495 data: 0.0627 max mem: 9377 +Train: [96] [1000/6250] eta: 0:13:25 lr: 0.000001 grad: 0.1579 (0.1686) loss: 0.8063 (0.8073) time: 0.1534 data: 0.0671 max mem: 9377 +Train: [96] [1100/6250] eta: 0:13:02 lr: 0.000000 grad: 0.1582 (0.1683) loss: 0.8109 (0.8074) time: 0.1359 data: 0.0512 max mem: 9377 +Train: [96] [1200/6250] eta: 0:12:41 lr: 0.000000 grad: 0.1521 (0.1677) loss: 0.8180 (0.8077) time: 0.1234 data: 0.0363 max mem: 9377 +Train: [96] [1300/6250] eta: 0:12:24 lr: 0.000000 grad: 0.1568 (0.1668) loss: 0.8119 (0.8080) time: 0.1376 data: 0.0596 max mem: 9377 +Train: [96] [1400/6250] eta: 0:12:07 lr: 0.000000 grad: 0.1519 (0.1662) loss: 0.8065 (0.8080) time: 0.1531 data: 0.0691 max mem: 9377 +Train: [96] [1500/6250] eta: 0:11:53 lr: 0.000000 grad: 0.1540 (0.1657) loss: 0.8080 (0.8081) time: 0.1407 data: 0.0456 max mem: 9377 +Train: [96] [1600/6250] eta: 0:11:37 lr: 0.000000 grad: 0.1512 (0.1654) loss: 0.8078 (0.8080) time: 0.1465 data: 0.0636 max mem: 9377 +Train: [96] [1700/6250] eta: 0:11:21 lr: 0.000000 grad: 0.1612 (0.1649) loss: 0.8098 (0.8079) time: 0.1587 data: 0.0732 max mem: 9377 +Train: [96] [1800/6250] eta: 0:11:05 lr: 0.000000 grad: 0.1517 (0.1646) loss: 0.8085 (0.8077) time: 0.1366 data: 0.0526 max mem: 9377 +Train: [96] [1900/6250] eta: 0:10:49 lr: 0.000000 grad: 0.1544 (0.1643) loss: 0.8056 (0.8076) time: 0.1326 data: 0.0431 max mem: 9377 +Train: [96] [2000/6250] eta: 0:10:31 lr: 0.000000 grad: 0.1539 (0.1642) loss: 0.8080 (0.8075) time: 0.1274 data: 0.0368 max mem: 9377 +Train: [96] [2100/6250] eta: 0:10:17 lr: 0.000000 grad: 0.1593 (0.1639) loss: 0.8093 (0.8075) time: 0.1448 data: 0.0665 max mem: 9377 +Train: [96] [2200/6250] eta: 0:10:02 lr: 0.000000 grad: 0.1547 (0.1636) loss: 0.8099 (0.8076) time: 0.1705 data: 0.0858 max mem: 9377 +Train: [96] [2300/6250] eta: 0:09:53 lr: 0.000000 grad: 0.1456 (0.1630) loss: 0.8142 (0.8078) time: 0.1511 data: 0.0702 max mem: 9377 +Train: [96] [2400/6250] eta: 0:09:38 lr: 0.000000 grad: 0.1437 (0.1626) loss: 0.8140 (0.8079) time: 0.1517 data: 0.0674 max mem: 9377 +Train: [96] [2500/6250] eta: 0:09:23 lr: 0.000000 grad: 0.1511 (0.1623) loss: 0.8081 (0.8081) time: 0.1478 data: 0.0652 max mem: 9377 +Train: [96] [2600/6250] eta: 0:09:07 lr: 0.000000 grad: 0.1429 (0.1620) loss: 0.8121 (0.8082) time: 0.1281 data: 0.0498 max mem: 9377 +Train: [96] [2700/6250] eta: 0:08:51 lr: 0.000000 grad: 0.1474 (0.1618) loss: 0.8077 (0.8082) time: 0.1494 data: 0.0682 max mem: 9377 +Train: [96] [2800/6250] eta: 0:08:35 lr: 0.000000 grad: 0.1533 (0.1614) loss: 0.8075 (0.8083) time: 0.1389 data: 0.0596 max mem: 9377 +Train: [96] [2900/6250] eta: 0:08:19 lr: 0.000000 grad: 0.1528 (0.1612) loss: 0.8083 (0.8083) time: 0.1364 data: 0.0529 max mem: 9377 +Train: [96] [3000/6250] eta: 0:08:04 lr: 0.000000 grad: 0.1472 (0.1609) loss: 0.8098 (0.8084) time: 0.1410 data: 0.0609 max mem: 9377 +Train: [96] [3100/6250] eta: 0:07:47 lr: 0.000000 grad: 0.1409 (0.1606) loss: 0.8096 (0.8085) time: 0.1192 data: 0.0389 max mem: 9377 +Train: [96] [3200/6250] eta: 0:07:31 lr: 0.000000 grad: 0.1517 (0.1607) loss: 0.8076 (0.8085) time: 0.1334 data: 0.0459 max mem: 9377 +Train: [96] [3300/6250] eta: 0:07:15 lr: 0.000000 grad: 0.1484 (0.1606) loss: 0.8117 (0.8084) time: 0.1321 data: 0.0467 max mem: 9377 +Train: [96] [3400/6250] eta: 0:06:59 lr: 0.000000 grad: 0.1514 (0.1607) loss: 0.8022 (0.8084) time: 0.1397 data: 0.0593 max mem: 9377 +Train: [96] [3500/6250] eta: 0:06:44 lr: 0.000000 grad: 0.1525 (0.1609) loss: 0.8068 (0.8083) time: 0.1313 data: 0.0509 max mem: 9377 +Train: [96] [3600/6250] eta: 0:06:29 lr: 0.000000 grad: 0.1513 (0.1608) loss: 0.8068 (0.8083) time: 0.1428 data: 0.0651 max mem: 9377 +Train: [96] [3700/6250] eta: 0:06:14 lr: 0.000000 grad: 0.1617 (0.1608) loss: 0.8087 (0.8083) time: 0.1462 data: 0.0568 max mem: 9377 +Train: [96] [3800/6250] eta: 0:06:00 lr: 0.000000 grad: 0.1563 (0.1608) loss: 0.8026 (0.8083) time: 0.1534 data: 0.0597 max mem: 9377 +Train: [96] [3900/6250] eta: 0:05:44 lr: 0.000000 grad: 0.1623 (0.1609) loss: 0.8038 (0.8082) time: 0.1372 data: 0.0535 max mem: 9377 +Train: [96] [4000/6250] eta: 0:05:30 lr: 0.000000 grad: 0.1605 (0.1610) loss: 0.8034 (0.8081) time: 0.1394 data: 0.0590 max mem: 9377 +Train: [96] [4100/6250] eta: 0:05:15 lr: 0.000000 grad: 0.1491 (0.1608) loss: 0.8167 (0.8083) time: 0.1455 data: 0.0651 max mem: 9377 +Train: [96] [4200/6250] eta: 0:05:00 lr: 0.000000 grad: 0.1574 (0.1606) loss: 0.8048 (0.8084) time: 0.1397 data: 0.0503 max mem: 9377 +Train: [96] [4300/6250] eta: 0:04:45 lr: 0.000000 grad: 0.1442 (0.1604) loss: 0.8089 (0.8085) time: 0.1493 data: 0.0648 max mem: 9377 +Train: [96] [4400/6250] eta: 0:04:30 lr: 0.000000 grad: 0.1504 (0.1603) loss: 0.8061 (0.8085) time: 0.1225 data: 0.0441 max mem: 9377 +Train: [96] [4500/6250] eta: 0:04:15 lr: 0.000000 grad: 0.1394 (0.1601) loss: 0.8128 (0.8086) time: 0.1548 data: 0.0733 max mem: 9377 +Train: [96] [4600/6250] eta: 0:04:00 lr: 0.000000 grad: 0.1522 (0.1599) loss: 0.8143 (0.8087) time: 0.1348 data: 0.0579 max mem: 9377 +Train: [96] [4700/6250] eta: 0:03:45 lr: 0.000000 grad: 0.1520 (0.1597) loss: 0.8177 (0.8088) time: 0.1309 data: 0.0472 max mem: 9377 +Train: [96] [4800/6250] eta: 0:03:31 lr: 0.000000 grad: 0.1448 (0.1596) loss: 0.8140 (0.8089) time: 0.1261 data: 0.0514 max mem: 9377 +Train: [96] [4900/6250] eta: 0:03:16 lr: 0.000000 grad: 0.1434 (0.1594) loss: 0.8121 (0.8090) time: 0.1396 data: 0.0633 max mem: 9377 +Train: [96] [5000/6250] eta: 0:03:01 lr: 0.000000 grad: 0.1573 (0.1593) loss: 0.8147 (0.8091) time: 0.1213 data: 0.0485 max mem: 9377 +Train: [96] [5100/6250] eta: 0:02:46 lr: 0.000000 grad: 0.1522 (0.1591) loss: 0.8102 (0.8093) time: 0.1326 data: 0.0549 max mem: 9377 +Train: [96] [5200/6250] eta: 0:02:31 lr: 0.000000 grad: 0.1477 (0.1590) loss: 0.8127 (0.8093) time: 0.1397 data: 0.0618 max mem: 9377 +Train: [96] [5300/6250] eta: 0:02:17 lr: 0.000000 grad: 0.1490 (0.1589) loss: 0.8196 (0.8095) time: 0.1219 data: 0.0487 max mem: 9377 +Train: [96] [5400/6250] eta: 0:02:02 lr: 0.000000 grad: 0.1533 (0.1588) loss: 0.8093 (0.8095) time: 0.1295 data: 0.0583 max mem: 9377 +Train: [96] [5500/6250] eta: 0:01:48 lr: 0.000000 grad: 0.1414 (0.1587) loss: 0.8197 (0.8096) time: 0.1307 data: 0.0546 max mem: 9377 +Train: [96] [5600/6250] eta: 0:01:33 lr: 0.000000 grad: 0.1592 (0.1586) loss: 0.8120 (0.8097) time: 0.1374 data: 0.0616 max mem: 9377 +Train: [96] [5700/6250] eta: 0:01:19 lr: 0.000000 grad: 0.1602 (0.1587) loss: 0.8104 (0.8097) time: 0.1450 data: 0.0691 max mem: 9377 +Train: [96] [5800/6250] eta: 0:01:04 lr: 0.000000 grad: 0.1575 (0.1587) loss: 0.8057 (0.8097) time: 0.1174 data: 0.0393 max mem: 9377 +Train: [96] [5900/6250] eta: 0:00:50 lr: 0.000000 grad: 0.1458 (0.1588) loss: 0.8113 (0.8097) time: 0.1194 data: 0.0420 max mem: 9377 +Train: [96] [6000/6250] eta: 0:00:35 lr: 0.000000 grad: 0.1552 (0.1589) loss: 0.8118 (0.8097) time: 0.1361 data: 0.0616 max mem: 9377 +Train: [96] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.1586 (0.1589) loss: 0.8061 (0.8097) time: 0.1265 data: 0.0526 max mem: 9377 +Train: [96] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1509 (0.1590) loss: 0.8083 (0.8096) time: 0.1279 data: 0.0495 max mem: 9377 +Train: [96] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1603 (0.1591) loss: 0.8073 (0.8096) time: 0.1353 data: 0.0628 max mem: 9377 +Train: [96] Total time: 0:14:57 (0.1436 s / it) +Averaged stats: lr: 0.000000 grad: 0.1603 (0.1591) loss: 0.8073 (0.8096) +Eval (hcp-train-subset): [96] [ 0/62] eta: 0:04:57 loss: 0.8156 (0.8156) time: 4.8055 data: 4.7769 max mem: 9377 +Eval (hcp-train-subset): [96] [61/62] eta: 0:00:00 loss: 0.8053 (0.8066) time: 0.1038 data: 0.0794 max mem: 9377 +Eval (hcp-train-subset): [96] Total time: 0:00:13 (0.2124 s / it) +Averaged stats (hcp-train-subset): loss: 0.8053 (0.8066) +Eval (hcp-val): [96] [ 0/62] eta: 0:04:26 loss: 0.8265 (0.8265) time: 4.2916 data: 4.2633 max mem: 9377 +Eval (hcp-val): [96] [61/62] eta: 0:00:00 loss: 0.8276 (0.8286) time: 0.1344 data: 0.1083 max mem: 9377 +Eval (hcp-val): [96] Total time: 0:00:12 (0.1963 s / it) +Averaged stats (hcp-val): loss: 0.8276 (0.8286) +Eval (nsd-val): [96] [ 0/62] eta: 0:04:28 loss: 0.7979 (0.7979) time: 4.3296 data: 4.3001 max mem: 9377 +Eval (nsd-val): [96] [61/62] eta: 0:00:00 loss: 0.8121 (0.8119) time: 0.1125 data: 0.0880 max mem: 9377 +Eval (nsd-val): [96] Total time: 0:00:11 (0.1924 s / it) +Averaged stats (nsd-val): loss: 0.8121 (0.8119) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [97] [ 0/6250] eta: 8:43:03 lr: 0.000000 grad: 0.5391 (0.5391) loss: 0.8085 (0.8085) time: 5.0213 data: 4.8838 max mem: 9377 +Train: [97] [ 100/6250] eta: 0:19:01 lr: 0.000000 grad: 0.1844 (0.2183) loss: 0.8193 (0.8083) time: 0.1361 data: 0.0461 max mem: 9377 +Train: [97] [ 200/6250] eta: 0:16:26 lr: 0.000000 grad: 0.1735 (0.2008) loss: 0.8165 (0.8104) time: 0.1472 data: 0.0605 max mem: 9377 +Train: [97] [ 300/6250] eta: 0:15:06 lr: 0.000000 grad: 0.1698 (0.1963) loss: 0.8121 (0.8101) time: 0.1329 data: 0.0468 max mem: 9377 +Train: [97] [ 400/6250] eta: 0:14:07 lr: 0.000000 grad: 0.1857 (0.1915) loss: 0.8031 (0.8100) time: 0.1256 data: 0.0445 max mem: 9377 +Train: [97] [ 500/6250] eta: 0:13:25 lr: 0.000000 grad: 0.1613 (0.1879) loss: 0.8111 (0.8100) time: 0.1398 data: 0.0586 max mem: 9377 +Train: [97] [ 600/6250] eta: 0:12:45 lr: 0.000000 grad: 0.1575 (0.1844) loss: 0.8071 (0.8102) time: 0.1212 data: 0.0404 max mem: 9377 +Train: [97] [ 700/6250] eta: 0:12:18 lr: 0.000000 grad: 0.1622 (0.1829) loss: 0.8052 (0.8099) time: 0.1155 data: 0.0364 max mem: 9377 +Train: [97] [ 800/6250] eta: 0:11:56 lr: 0.000000 grad: 0.1698 (0.1819) loss: 0.8113 (0.8096) time: 0.1214 data: 0.0343 max mem: 9377 +Train: [97] [ 900/6250] eta: 0:11:34 lr: 0.000000 grad: 0.1611 (0.1805) loss: 0.8079 (0.8095) time: 0.1125 data: 0.0334 max mem: 9377 +Train: [97] [1000/6250] eta: 0:11:13 lr: 0.000000 grad: 0.1660 (0.1796) loss: 0.8022 (0.8090) time: 0.1211 data: 0.0468 max mem: 9377 +Train: [97] [1100/6250] eta: 0:10:54 lr: 0.000000 grad: 0.1532 (0.1778) loss: 0.8079 (0.8088) time: 0.1090 data: 0.0347 max mem: 9377 +Train: [97] [1200/6250] eta: 0:10:38 lr: 0.000000 grad: 0.1575 (0.1764) loss: 0.8110 (0.8088) time: 0.1309 data: 0.0570 max mem: 9377 +Train: [97] [1300/6250] eta: 0:10:22 lr: 0.000000 grad: 0.1628 (0.1754) loss: 0.8034 (0.8086) time: 0.1206 data: 0.0391 max mem: 9377 +Train: [97] [1400/6250] eta: 0:10:09 lr: 0.000000 grad: 0.1554 (0.1744) loss: 0.8112 (0.8087) time: 0.1158 data: 0.0428 max mem: 9377 +Train: [97] [1500/6250] eta: 0:09:56 lr: 0.000000 grad: 0.1517 (0.1732) loss: 0.8075 (0.8089) time: 0.1278 data: 0.0561 max mem: 9377 +Train: [97] [1600/6250] eta: 0:09:44 lr: 0.000000 grad: 0.1468 (0.1721) loss: 0.8129 (0.8090) time: 0.1368 data: 0.0615 max mem: 9377 +Train: [97] [1700/6250] eta: 0:09:32 lr: 0.000000 grad: 0.1639 (0.1713) loss: 0.8150 (0.8092) time: 0.1248 data: 0.0503 max mem: 9377 +Train: [97] [1800/6250] eta: 0:09:19 lr: 0.000000 grad: 0.1507 (0.1702) loss: 0.8108 (0.8094) time: 0.1314 data: 0.0566 max mem: 9377 +Train: [97] [1900/6250] eta: 0:09:04 lr: 0.000000 grad: 0.1501 (0.1695) loss: 0.8138 (0.8095) time: 0.1173 data: 0.0344 max mem: 9377 +Train: [97] [2000/6250] eta: 0:08:50 lr: 0.000000 grad: 0.1427 (0.1687) loss: 0.8183 (0.8097) time: 0.1187 data: 0.0432 max mem: 9377 +Train: [97] [2100/6250] eta: 0:08:36 lr: 0.000000 grad: 0.1629 (0.1681) loss: 0.8106 (0.8098) time: 0.1146 data: 0.0403 max mem: 9377 +Train: [97] [2200/6250] eta: 0:08:22 lr: 0.000000 grad: 0.1451 (0.1675) loss: 0.8165 (0.8100) time: 0.1150 data: 0.0352 max mem: 9377 +Train: [97] [2300/6250] eta: 0:08:09 lr: 0.000000 grad: 0.1470 (0.1669) loss: 0.8211 (0.8102) time: 0.1180 data: 0.0430 max mem: 9377 +Train: [97] [2400/6250] eta: 0:07:55 lr: 0.000000 grad: 0.1515 (0.1663) loss: 0.8154 (0.8104) time: 0.1288 data: 0.0467 max mem: 9377 +Train: [97] [2500/6250] eta: 0:07:42 lr: 0.000000 grad: 0.1430 (0.1657) loss: 0.8178 (0.8106) time: 0.1245 data: 0.0516 max mem: 9377 +Train: [97] [2600/6250] eta: 0:07:30 lr: 0.000000 grad: 0.1395 (0.1651) loss: 0.8226 (0.8108) time: 0.1254 data: 0.0453 max mem: 9377 +Train: [97] [2700/6250] eta: 0:07:17 lr: 0.000000 grad: 0.1467 (0.1645) loss: 0.8144 (0.8110) time: 0.1176 data: 0.0414 max mem: 9377 +Train: [97] [2800/6250] eta: 0:07:05 lr: 0.000000 grad: 0.1412 (0.1638) loss: 0.8198 (0.8112) time: 0.1200 data: 0.0459 max mem: 9377 +Train: [97] [2900/6250] eta: 0:06:53 lr: 0.000000 grad: 0.1572 (0.1633) loss: 0.8127 (0.8113) time: 0.1156 data: 0.0436 max mem: 9377 +Train: [97] [3000/6250] eta: 0:06:40 lr: 0.000000 grad: 0.1399 (0.1629) loss: 0.8205 (0.8114) time: 0.1200 data: 0.0447 max mem: 9377 +Train: [97] [3100/6250] eta: 0:06:28 lr: 0.000000 grad: 0.1426 (0.1624) loss: 0.8161 (0.8115) time: 0.1265 data: 0.0480 max mem: 9377 +Train: [97] [3200/6250] eta: 0:06:15 lr: 0.000000 grad: 0.1491 (0.1620) loss: 0.8060 (0.8115) time: 0.1166 data: 0.0412 max mem: 9377 +Train: [97] [3300/6250] eta: 0:06:04 lr: 0.000000 grad: 0.1523 (0.1617) loss: 0.8140 (0.8116) time: 0.1768 data: 0.1059 max mem: 9377 +Train: [97] [3400/6250] eta: 0:05:50 lr: 0.000000 grad: 0.1497 (0.1616) loss: 0.8092 (0.8115) time: 0.1087 data: 0.0322 max mem: 9377 +Train: [97] [3500/6250] eta: 0:05:38 lr: 0.000000 grad: 0.1572 (0.1616) loss: 0.8122 (0.8114) time: 0.1187 data: 0.0371 max mem: 9377 +Train: [97] [3600/6250] eta: 0:05:25 lr: 0.000000 grad: 0.1514 (0.1614) loss: 0.8146 (0.8114) time: 0.1248 data: 0.0511 max mem: 9377 +Train: [97] [3700/6250] eta: 0:05:13 lr: 0.000000 grad: 0.1648 (0.1614) loss: 0.7993 (0.8113) time: 0.1055 data: 0.0296 max mem: 9377 +Train: [97] [3800/6250] eta: 0:05:00 lr: 0.000000 grad: 0.1654 (0.1614) loss: 0.8045 (0.8112) time: 0.1203 data: 0.0408 max mem: 9377 +Train: [97] [3900/6250] eta: 0:04:48 lr: 0.000000 grad: 0.1537 (0.1613) loss: 0.8083 (0.8111) time: 0.1283 data: 0.0494 max mem: 9377 +Train: [97] [4000/6250] eta: 0:04:35 lr: 0.000000 grad: 0.1605 (0.1613) loss: 0.8089 (0.8109) time: 0.1176 data: 0.0407 max mem: 9377 +Train: [97] [4100/6250] eta: 0:04:23 lr: 0.000000 grad: 0.1544 (0.1613) loss: 0.8103 (0.8108) time: 0.1344 data: 0.0611 max mem: 9377 +Train: [97] [4200/6250] eta: 0:04:11 lr: 0.000000 grad: 0.1674 (0.1614) loss: 0.8128 (0.8108) time: 0.1297 data: 0.0564 max mem: 9377 +Train: [97] [4300/6250] eta: 0:03:58 lr: 0.000000 grad: 0.1567 (0.1614) loss: 0.8020 (0.8107) time: 0.1184 data: 0.0418 max mem: 9377 +Train: [97] [4400/6250] eta: 0:03:46 lr: 0.000000 grad: 0.1539 (0.1614) loss: 0.8147 (0.8107) time: 0.1117 data: 0.0376 max mem: 9377 +Train: [97] [4500/6250] eta: 0:03:34 lr: 0.000000 grad: 0.1519 (0.1614) loss: 0.8115 (0.8107) time: 0.1298 data: 0.0553 max mem: 9377 +Train: [97] [4600/6250] eta: 0:03:22 lr: 0.000000 grad: 0.1535 (0.1614) loss: 0.8058 (0.8107) time: 0.1175 data: 0.0447 max mem: 9377 +Train: [97] [4700/6250] eta: 0:03:09 lr: 0.000000 grad: 0.1636 (0.1614) loss: 0.8140 (0.8106) time: 0.1066 data: 0.0279 max mem: 9377 +Train: [97] [4800/6250] eta: 0:02:57 lr: 0.000000 grad: 0.1428 (0.1613) loss: 0.8103 (0.8106) time: 0.1205 data: 0.0401 max mem: 9377 +Train: [97] [4900/6250] eta: 0:02:45 lr: 0.000000 grad: 0.1515 (0.1613) loss: 0.8099 (0.8105) time: 0.1192 data: 0.0330 max mem: 9377 +Train: [97] [5000/6250] eta: 0:02:33 lr: 0.000000 grad: 0.1523 (0.1614) loss: 0.8073 (0.8105) time: 0.1204 data: 0.0404 max mem: 9377 +Train: [97] [5100/6250] eta: 0:02:21 lr: 0.000000 grad: 0.1528 (0.1613) loss: 0.8048 (0.8104) time: 0.1298 data: 0.0572 max mem: 9377 +Train: [97] [5200/6250] eta: 0:02:08 lr: 0.000000 grad: 0.1532 (0.1613) loss: 0.8116 (0.8104) time: 0.1167 data: 0.0414 max mem: 9377 +Train: [97] [5300/6250] eta: 0:01:56 lr: 0.000000 grad: 0.1681 (0.1613) loss: 0.7971 (0.8103) time: 0.1207 data: 0.0419 max mem: 9377 +Train: [97] [5400/6250] eta: 0:01:44 lr: 0.000000 grad: 0.1540 (0.1612) loss: 0.8029 (0.8103) time: 0.1139 data: 0.0302 max mem: 9377 +Train: [97] [5500/6250] eta: 0:01:31 lr: 0.000000 grad: 0.1604 (0.1611) loss: 0.8078 (0.8103) time: 0.1069 data: 0.0269 max mem: 9377 +Train: [97] [5600/6250] eta: 0:01:19 lr: 0.000000 grad: 0.1459 (0.1610) loss: 0.8149 (0.8103) time: 0.1240 data: 0.0474 max mem: 9377 +Train: [97] [5700/6250] eta: 0:01:07 lr: 0.000000 grad: 0.1607 (0.1608) loss: 0.8099 (0.8103) time: 0.1141 data: 0.0329 max mem: 9377 +Train: [97] [5800/6250] eta: 0:00:55 lr: 0.000000 grad: 0.1555 (0.1607) loss: 0.8145 (0.8103) time: 0.1217 data: 0.0437 max mem: 9377 +Train: [97] [5900/6250] eta: 0:00:42 lr: 0.000000 grad: 0.1591 (0.1606) loss: 0.8094 (0.8103) time: 0.1263 data: 0.0535 max mem: 9377 +Train: [97] [6000/6250] eta: 0:00:30 lr: 0.000000 grad: 0.1479 (0.1605) loss: 0.8091 (0.8102) time: 0.1329 data: 0.0577 max mem: 9377 +Train: [97] [6100/6250] eta: 0:00:18 lr: 0.000000 grad: 0.1514 (0.1605) loss: 0.8069 (0.8102) time: 0.0980 data: 0.0187 max mem: 9377 +Train: [97] [6200/6250] eta: 0:00:06 lr: 0.000000 grad: 0.1458 (0.1604) loss: 0.8133 (0.8102) time: 0.1230 data: 0.0473 max mem: 9377 +Train: [97] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1567 (0.1604) loss: 0.8094 (0.8102) time: 0.1167 data: 0.0394 max mem: 9377 +Train: [97] Total time: 0:12:48 (0.1229 s / it) +Averaged stats: lr: 0.000000 grad: 0.1567 (0.1604) loss: 0.8094 (0.8102) +Eval (hcp-train-subset): [97] [ 0/62] eta: 0:04:42 loss: 0.8176 (0.8176) time: 4.5563 data: 4.5283 max mem: 9377 +Eval (hcp-train-subset): [97] [61/62] eta: 0:00:00 loss: 0.8043 (0.8063) time: 0.1167 data: 0.0923 max mem: 9377 +Eval (hcp-train-subset): [97] Total time: 0:00:11 (0.1881 s / it) +Averaged stats (hcp-train-subset): loss: 0.8043 (0.8063) +Eval (hcp-val): [97] [ 0/62] eta: 0:03:23 loss: 0.8262 (0.8262) time: 3.2857 data: 3.2150 max mem: 9377 +Eval (hcp-val): [97] [61/62] eta: 0:00:00 loss: 0.8267 (0.8285) time: 0.0996 data: 0.0753 max mem: 9377 +Eval (hcp-val): [97] Total time: 0:00:11 (0.1881 s / it) +Averaged stats (hcp-val): loss: 0.8267 (0.8285) +Eval (nsd-val): [97] [ 0/62] eta: 0:04:41 loss: 0.7994 (0.7994) time: 4.5393 data: 4.5113 max mem: 9377 +Eval (nsd-val): [97] [61/62] eta: 0:00:00 loss: 0.8104 (0.8119) time: 0.1144 data: 0.0874 max mem: 9377 +Eval (nsd-val): [97] Total time: 0:00:11 (0.1862 s / it) +Averaged stats (nsd-val): loss: 0.8104 (0.8119) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [98] [ 0/6250] eta: 8:30:46 lr: 0.000000 grad: 0.1883 (0.1883) loss: 0.7064 (0.7064) time: 4.9035 data: 4.7609 max mem: 9377 +Train: [98] [ 100/6250] eta: 0:17:54 lr: 0.000000 grad: 0.1850 (0.1899) loss: 0.7975 (0.8011) time: 0.1421 data: 0.0583 max mem: 9377 +Train: [98] [ 200/6250] eta: 0:15:15 lr: 0.000000 grad: 0.1839 (0.1873) loss: 0.7972 (0.7984) time: 0.1451 data: 0.0604 max mem: 9377 +Train: [98] [ 300/6250] eta: 0:14:17 lr: 0.000000 grad: 0.1561 (0.1832) loss: 0.8135 (0.7999) time: 0.1191 data: 0.0223 max mem: 9377 +Train: [98] [ 400/6250] eta: 0:13:49 lr: 0.000000 grad: 0.1743 (0.1805) loss: 0.8001 (0.8009) time: 0.1301 data: 0.0457 max mem: 9377 +Train: [98] [ 500/6250] eta: 0:13:22 lr: 0.000000 grad: 0.1716 (0.1789) loss: 0.7989 (0.8017) time: 0.1197 data: 0.0310 max mem: 9377 +Train: [98] [ 600/6250] eta: 0:12:56 lr: 0.000000 grad: 0.1586 (0.1767) loss: 0.8049 (0.8021) time: 0.1283 data: 0.0412 max mem: 9377 +Train: [98] [ 700/6250] eta: 0:12:33 lr: 0.000000 grad: 0.1522 (0.1746) loss: 0.8098 (0.8031) time: 0.1166 data: 0.0331 max mem: 9377 +Train: [98] [ 800/6250] eta: 0:12:15 lr: 0.000000 grad: 0.1597 (0.1732) loss: 0.8077 (0.8038) time: 0.1427 data: 0.0585 max mem: 9377 +Train: [98] [ 900/6250] eta: 0:11:56 lr: 0.000000 grad: 0.1581 (0.1722) loss: 0.8057 (0.8043) time: 0.1226 data: 0.0443 max mem: 9377 +Train: [98] [1000/6250] eta: 0:11:35 lr: 0.000000 grad: 0.1592 (0.1709) loss: 0.8075 (0.8047) time: 0.1274 data: 0.0523 max mem: 9377 +Train: [98] [1100/6250] eta: 0:11:16 lr: 0.000000 grad: 0.1490 (0.1696) loss: 0.8182 (0.8052) time: 0.1064 data: 0.0283 max mem: 9377 +Train: [98] [1200/6250] eta: 0:10:59 lr: 0.000000 grad: 0.1622 (0.1694) loss: 0.8107 (0.8055) time: 0.1310 data: 0.0499 max mem: 9377 +Train: [98] [1300/6250] eta: 0:10:39 lr: 0.000000 grad: 0.1626 (0.1690) loss: 0.8117 (0.8056) time: 0.1060 data: 0.0289 max mem: 9377 +Train: [98] [1400/6250] eta: 0:10:23 lr: 0.000000 grad: 0.1516 (0.1690) loss: 0.8085 (0.8058) time: 0.1242 data: 0.0501 max mem: 9377 +Train: [98] [1500/6250] eta: 0:10:06 lr: 0.000000 grad: 0.1629 (0.1685) loss: 0.8054 (0.8058) time: 0.1249 data: 0.0454 max mem: 9377 +Train: [98] [1600/6250] eta: 0:09:48 lr: 0.000000 grad: 0.1517 (0.1683) loss: 0.8001 (0.8058) time: 0.1010 data: 0.0259 max mem: 9377 +Train: [98] [1700/6250] eta: 0:09:32 lr: 0.000000 grad: 0.1519 (0.1680) loss: 0.8062 (0.8057) time: 0.0825 data: 0.0028 max mem: 9377 +Train: [98] [1800/6250] eta: 0:09:17 lr: 0.000000 grad: 0.1563 (0.1677) loss: 0.8020 (0.8057) time: 0.1156 data: 0.0421 max mem: 9377 +Train: [98] [1900/6250] eta: 0:09:02 lr: 0.000000 grad: 0.1569 (0.1672) loss: 0.8079 (0.8060) time: 0.1370 data: 0.0613 max mem: 9377 +Train: [98] [2000/6250] eta: 0:08:47 lr: 0.000000 grad: 0.1585 (0.1669) loss: 0.8052 (0.8060) time: 0.1212 data: 0.0480 max mem: 9377 +Train: [98] [2100/6250] eta: 0:08:33 lr: 0.000000 grad: 0.1642 (0.1668) loss: 0.8056 (0.8060) time: 0.1214 data: 0.0448 max mem: 9377 +Train: [98] [2200/6250] eta: 0:08:18 lr: 0.000000 grad: 0.1449 (0.1665) loss: 0.8076 (0.8061) time: 0.1034 data: 0.0279 max mem: 9377 +Train: [98] [2300/6250] eta: 0:08:06 lr: 0.000000 grad: 0.1441 (0.1662) loss: 0.8147 (0.8062) time: 0.1211 data: 0.0414 max mem: 9377 +Train: [98] [2400/6250] eta: 0:07:53 lr: 0.000000 grad: 0.1477 (0.1659) loss: 0.8133 (0.8062) time: 0.1181 data: 0.0423 max mem: 9377 +Train: [98] [2500/6250] eta: 0:07:40 lr: 0.000000 grad: 0.1566 (0.1657) loss: 0.8081 (0.8063) time: 0.1222 data: 0.0484 max mem: 9377 +Train: [98] [2600/6250] eta: 0:07:28 lr: 0.000000 grad: 0.1514 (0.1654) loss: 0.8137 (0.8065) time: 0.1213 data: 0.0462 max mem: 9377 +Train: [98] [2700/6250] eta: 0:07:15 lr: 0.000000 grad: 0.1471 (0.1651) loss: 0.8121 (0.8066) time: 0.1036 data: 0.0223 max mem: 9377 +Train: [98] [2800/6250] eta: 0:07:02 lr: 0.000000 grad: 0.1621 (0.1648) loss: 0.8065 (0.8067) time: 0.1062 data: 0.0294 max mem: 9377 +Train: [98] [2900/6250] eta: 0:06:49 lr: 0.000000 grad: 0.1518 (0.1646) loss: 0.8119 (0.8069) time: 0.1145 data: 0.0408 max mem: 9377 +Train: [98] [3000/6250] eta: 0:06:36 lr: 0.000000 grad: 0.1548 (0.1643) loss: 0.8124 (0.8071) time: 0.1337 data: 0.0531 max mem: 9377 +Train: [98] [3100/6250] eta: 0:06:24 lr: 0.000000 grad: 0.1475 (0.1642) loss: 0.8107 (0.8072) time: 0.1226 data: 0.0465 max mem: 9377 +Train: [98] [3200/6250] eta: 0:06:11 lr: 0.000000 grad: 0.1606 (0.1640) loss: 0.8050 (0.8073) time: 0.1004 data: 0.0219 max mem: 9377 +Train: [98] [3300/6250] eta: 0:05:59 lr: 0.000000 grad: 0.1490 (0.1638) loss: 0.8117 (0.8074) time: 0.1254 data: 0.0484 max mem: 9377 +Train: [98] [3400/6250] eta: 0:05:46 lr: 0.000000 grad: 0.1566 (0.1638) loss: 0.8104 (0.8075) time: 0.1044 data: 0.0318 max mem: 9377 +Train: [98] [3500/6250] eta: 0:05:34 lr: 0.000000 grad: 0.1485 (0.1636) loss: 0.8061 (0.8076) time: 0.1041 data: 0.0232 max mem: 9377 +Train: [98] [3600/6250] eta: 0:05:21 lr: 0.000000 grad: 0.1652 (0.1637) loss: 0.8076 (0.8076) time: 0.1091 data: 0.0317 max mem: 9377 +Train: [98] [3700/6250] eta: 0:05:09 lr: 0.000000 grad: 0.1657 (0.1636) loss: 0.8089 (0.8076) time: 0.0880 data: 0.0108 max mem: 9377 +Train: [98] [3800/6250] eta: 0:04:56 lr: 0.000000 grad: 0.1622 (0.1635) loss: 0.8155 (0.8077) time: 0.1212 data: 0.0422 max mem: 9377 +Train: [98] [3900/6250] eta: 0:04:43 lr: 0.000000 grad: 0.1464 (0.1634) loss: 0.8193 (0.8079) time: 0.1140 data: 0.0366 max mem: 9377 +Train: [98] [4000/6250] eta: 0:04:31 lr: 0.000000 grad: 0.1538 (0.1631) loss: 0.8171 (0.8080) time: 0.1142 data: 0.0352 max mem: 9377 +Train: [98] [4100/6250] eta: 0:04:18 lr: 0.000000 grad: 0.1511 (0.1628) loss: 0.8059 (0.8081) time: 0.1051 data: 0.0317 max mem: 9377 +Train: [98] [4200/6250] eta: 0:04:06 lr: 0.000000 grad: 0.1580 (0.1627) loss: 0.8072 (0.8082) time: 0.1087 data: 0.0299 max mem: 9377 +Train: [98] [4300/6250] eta: 0:03:54 lr: 0.000000 grad: 0.1479 (0.1625) loss: 0.8096 (0.8083) time: 0.0980 data: 0.0199 max mem: 9377 +Train: [98] [4400/6250] eta: 0:03:42 lr: 0.000000 grad: 0.1490 (0.1622) loss: 0.8196 (0.8084) time: 0.1176 data: 0.0415 max mem: 9377 +Train: [98] [4500/6250] eta: 0:03:29 lr: 0.000000 grad: 0.1482 (0.1620) loss: 0.8151 (0.8085) time: 0.1072 data: 0.0320 max mem: 9377 +Train: [98] [4600/6250] eta: 0:03:17 lr: 0.000000 grad: 0.1504 (0.1618) loss: 0.8085 (0.8086) time: 0.1158 data: 0.0412 max mem: 9377 +Train: [98] [4700/6250] eta: 0:03:05 lr: 0.000000 grad: 0.1543 (0.1616) loss: 0.8190 (0.8087) time: 0.1308 data: 0.0555 max mem: 9377 +Train: [98] [4800/6250] eta: 0:02:53 lr: 0.000000 grad: 0.1464 (0.1615) loss: 0.8146 (0.8088) time: 0.1188 data: 0.0407 max mem: 9377 +Train: [98] [4900/6250] eta: 0:02:41 lr: 0.000000 grad: 0.1539 (0.1614) loss: 0.8083 (0.8089) time: 0.1325 data: 0.0582 max mem: 9377 +Train: [98] [5000/6250] eta: 0:02:29 lr: 0.000000 grad: 0.1523 (0.1613) loss: 0.8178 (0.8090) time: 0.1151 data: 0.0377 max mem: 9377 +Train: [98] [5100/6250] eta: 0:02:17 lr: 0.000000 grad: 0.1514 (0.1611) loss: 0.8032 (0.8091) time: 0.1152 data: 0.0387 max mem: 9377 +Train: [98] [5200/6250] eta: 0:02:05 lr: 0.000000 grad: 0.1429 (0.1610) loss: 0.8153 (0.8092) time: 0.1179 data: 0.0396 max mem: 9377 +Train: [98] [5300/6250] eta: 0:01:53 lr: 0.000000 grad: 0.1492 (0.1609) loss: 0.8104 (0.8093) time: 0.0919 data: 0.0075 max mem: 9377 +Train: [98] [5400/6250] eta: 0:01:41 lr: 0.000000 grad: 0.1513 (0.1608) loss: 0.8141 (0.8093) time: 0.1247 data: 0.0425 max mem: 9377 +Train: [98] [5500/6250] eta: 0:01:29 lr: 0.000000 grad: 0.1493 (0.1606) loss: 0.8142 (0.8094) time: 0.0988 data: 0.0171 max mem: 9377 +Train: [98] [5600/6250] eta: 0:01:17 lr: 0.000000 grad: 0.1590 (0.1606) loss: 0.8119 (0.8095) time: 0.1163 data: 0.0389 max mem: 9377 +Train: [98] [5700/6250] eta: 0:01:05 lr: 0.000000 grad: 0.1427 (0.1605) loss: 0.8178 (0.8096) time: 0.1214 data: 0.0463 max mem: 9377 +Train: [98] [5800/6250] eta: 0:00:53 lr: 0.000000 grad: 0.1623 (0.1605) loss: 0.8124 (0.8096) time: 0.1144 data: 0.0375 max mem: 9377 +Train: [98] [5900/6250] eta: 0:00:41 lr: 0.000000 grad: 0.1639 (0.1606) loss: 0.8112 (0.8096) time: 0.1199 data: 0.0446 max mem: 9377 +Train: [98] [6000/6250] eta: 0:00:29 lr: 0.000000 grad: 0.1503 (0.1606) loss: 0.8085 (0.8097) time: 0.1041 data: 0.0254 max mem: 9377 +Train: [98] [6100/6250] eta: 0:00:17 lr: 0.000000 grad: 0.1539 (0.1605) loss: 0.8150 (0.8097) time: 0.1149 data: 0.0386 max mem: 9377 +Train: [98] [6200/6250] eta: 0:00:05 lr: 0.000000 grad: 0.1617 (0.1606) loss: 0.8088 (0.8098) time: 0.1275 data: 0.0557 max mem: 9377 +Train: [98] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1647 (0.1606) loss: 0.8130 (0.8098) time: 0.1190 data: 0.0440 max mem: 9377 +Train: [98] Total time: 0:12:30 (0.1201 s / it) +Averaged stats: lr: 0.000000 grad: 0.1647 (0.1606) loss: 0.8130 (0.8098) +Eval (hcp-train-subset): [98] [ 0/62] eta: 0:05:35 loss: 0.8159 (0.8159) time: 5.4159 data: 5.3851 max mem: 9377 +Eval (hcp-train-subset): [98] [61/62] eta: 0:00:00 loss: 0.8063 (0.8066) time: 0.1202 data: 0.0955 max mem: 9377 +Eval (hcp-train-subset): [98] Total time: 0:00:14 (0.2282 s / it) +Averaged stats (hcp-train-subset): loss: 0.8063 (0.8066) +Eval (hcp-val): [98] [ 0/62] eta: 0:05:23 loss: 0.8256 (0.8256) time: 5.2134 data: 5.1851 max mem: 9377 +Eval (hcp-val): [98] [61/62] eta: 0:00:00 loss: 0.8270 (0.8285) time: 0.1001 data: 0.0740 max mem: 9377 +Eval (hcp-val): [98] Total time: 0:00:12 (0.2018 s / it) +Averaged stats (hcp-val): loss: 0.8270 (0.8285) +Eval (nsd-val): [98] [ 0/62] eta: 0:05:07 loss: 0.8044 (0.8044) time: 4.9624 data: 4.9341 max mem: 9377 +Eval (nsd-val): [98] [61/62] eta: 0:00:00 loss: 0.8119 (0.8118) time: 0.1285 data: 0.1040 max mem: 9377 +Eval (nsd-val): [98] Total time: 0:00:12 (0.1991 s / it) +Averaged stats (nsd-val): loss: 0.8119 (0.8118) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +Train: [99] [ 0/6250] eta: 7:33:55 lr: 0.000000 grad: 0.1603 (0.1603) loss: 0.8328 (0.8328) time: 4.3576 data: 4.0648 max mem: 9377 +Train: [99] [ 100/6250] eta: 0:18:41 lr: 0.000000 grad: 0.1608 (0.1755) loss: 0.8161 (0.8204) time: 0.1292 data: 0.0401 max mem: 9377 +Train: [99] [ 200/6250] eta: 0:16:13 lr: 0.000000 grad: 0.1645 (0.1747) loss: 0.8241 (0.8180) time: 0.1455 data: 0.0585 max mem: 9377 +Train: [99] [ 300/6250] eta: 0:15:13 lr: 0.000000 grad: 0.1594 (0.1742) loss: 0.8106 (0.8167) time: 0.1417 data: 0.0567 max mem: 9377 +Train: [99] [ 400/6250] eta: 0:14:22 lr: 0.000000 grad: 0.1630 (0.1726) loss: 0.8052 (0.8153) time: 0.1257 data: 0.0253 max mem: 9377 +Train: [99] [ 500/6250] eta: 0:13:45 lr: 0.000000 grad: 0.1657 (0.1715) loss: 0.8054 (0.8144) time: 0.1323 data: 0.0476 max mem: 9377 +Train: [99] [ 600/6250] eta: 0:13:12 lr: 0.000000 grad: 0.1710 (0.1726) loss: 0.8143 (0.8140) time: 0.1387 data: 0.0544 max mem: 9377 +Train: [99] [ 700/6250] eta: 0:12:42 lr: 0.000000 grad: 0.1655 (0.1749) loss: 0.8033 (0.8139) time: 0.1304 data: 0.0410 max mem: 9377 +Train: [99] [ 800/6250] eta: 0:12:21 lr: 0.000000 grad: 0.1447 (0.1756) loss: 0.8088 (0.8136) time: 0.1317 data: 0.0495 max mem: 9377 +Train: [99] [ 900/6250] eta: 0:11:57 lr: 0.000000 grad: 0.1597 (0.1756) loss: 0.8073 (0.8132) time: 0.1162 data: 0.0337 max mem: 9377 +Train: [99] [1000/6250] eta: 0:11:35 lr: 0.000000 grad: 0.1778 (0.1755) loss: 0.8074 (0.8130) time: 0.1344 data: 0.0574 max mem: 9377 +Train: [99] [1100/6250] eta: 0:11:16 lr: 0.000000 grad: 0.1499 (0.1751) loss: 0.8123 (0.8129) time: 0.1223 data: 0.0371 max mem: 9377 +Train: [99] [1200/6250] eta: 0:10:58 lr: 0.000000 grad: 0.1672 (0.1740) loss: 0.8119 (0.8131) time: 0.1214 data: 0.0460 max mem: 9377 +Train: [99] [1300/6250] eta: 0:10:39 lr: 0.000000 grad: 0.1587 (0.1733) loss: 0.8119 (0.8129) time: 0.1226 data: 0.0516 max mem: 9377 +Train: [99] [1400/6250] eta: 0:10:23 lr: 0.000000 grad: 0.1634 (0.1729) loss: 0.8053 (0.8128) time: 0.1208 data: 0.0461 max mem: 9377 +Train: [99] [1500/6250] eta: 0:10:08 lr: 0.000000 grad: 0.1690 (0.1725) loss: 0.8149 (0.8128) time: 0.1196 data: 0.0433 max mem: 9377 +Train: [99] [1600/6250] eta: 0:09:54 lr: 0.000000 grad: 0.1478 (0.1716) loss: 0.8105 (0.8129) time: 0.1248 data: 0.0509 max mem: 9377 +Train: [99] [1700/6250] eta: 0:09:39 lr: 0.000000 grad: 0.1554 (0.1709) loss: 0.8114 (0.8129) time: 0.1254 data: 0.0483 max mem: 9377 +Train: [99] [1800/6250] eta: 0:09:26 lr: 0.000000 grad: 0.1643 (0.1704) loss: 0.8139 (0.8129) time: 0.1409 data: 0.0672 max mem: 9377 +Train: [99] [1900/6250] eta: 0:09:12 lr: 0.000000 grad: 0.1590 (0.1699) loss: 0.8094 (0.8127) time: 0.1186 data: 0.0469 max mem: 9377 +Train: [99] [2000/6250] eta: 0:08:58 lr: 0.000000 grad: 0.1421 (0.1692) loss: 0.8159 (0.8127) time: 0.1257 data: 0.0507 max mem: 9377 +Train: [99] [2100/6250] eta: 0:08:44 lr: 0.000000 grad: 0.1507 (0.1685) loss: 0.8096 (0.8127) time: 0.1254 data: 0.0497 max mem: 9377 +Train: [99] [2200/6250] eta: 0:08:30 lr: 0.000000 grad: 0.1432 (0.1681) loss: 0.8137 (0.8127) time: 0.1135 data: 0.0365 max mem: 9377 +Train: [99] [2300/6250] eta: 0:08:17 lr: 0.000000 grad: 0.1449 (0.1674) loss: 0.8155 (0.8127) time: 0.1301 data: 0.0566 max mem: 9377 +Train: [99] [2400/6250] eta: 0:08:03 lr: 0.000000 grad: 0.1528 (0.1670) loss: 0.8024 (0.8125) time: 0.1173 data: 0.0469 max mem: 9377 +Train: [99] [2500/6250] eta: 0:07:50 lr: 0.000000 grad: 0.1473 (0.1665) loss: 0.8145 (0.8125) time: 0.1103 data: 0.0338 max mem: 9377 +Train: [99] [2600/6250] eta: 0:07:44 lr: 0.000000 grad: 0.1549 (0.1663) loss: 0.8083 (0.8124) time: 0.2273 data: 0.0954 max mem: 9377 +Train: [99] [2700/6250] eta: 0:07:42 lr: 0.000000 grad: 0.1534 (0.1659) loss: 0.8147 (0.8122) time: 0.1914 data: 0.0706 max mem: 9377 +Train: [99] [2800/6250] eta: 0:07:47 lr: 0.000000 grad: 0.1573 (0.1657) loss: 0.8084 (0.8121) time: 0.4939 data: 0.2538 max mem: 9377 +Train: [99] [2900/6250] eta: 0:07:36 lr: 0.000000 grad: 0.1511 (0.1653) loss: 0.8098 (0.8120) time: 0.1064 data: 0.0252 max mem: 9377 +Train: [99] [3000/6250] eta: 0:07:20 lr: 0.000000 grad: 0.1570 (0.1651) loss: 0.8061 (0.8119) time: 0.1420 data: 0.0687 max mem: 9377 +Train: [99] [3100/6250] eta: 0:07:04 lr: 0.000000 grad: 0.1497 (0.1649) loss: 0.8113 (0.8117) time: 0.1207 data: 0.0434 max mem: 9377 +Train: [99] [3200/6250] eta: 0:06:49 lr: 0.000000 grad: 0.1579 (0.1647) loss: 0.8058 (0.8116) time: 0.1043 data: 0.0230 max mem: 9377 +Train: [99] [3300/6250] eta: 0:06:40 lr: 0.000000 grad: 0.1498 (0.1645) loss: 0.8042 (0.8116) time: 0.2061 data: 0.0463 max mem: 9377 +Train: [99] [3400/6250] eta: 0:06:34 lr: 0.000000 grad: 0.1548 (0.1646) loss: 0.8053 (0.8115) time: 0.4726 data: 0.3237 max mem: 9377 +Train: [99] [3500/6250] eta: 0:06:21 lr: 0.000000 grad: 0.1492 (0.1643) loss: 0.8083 (0.8116) time: 0.0944 data: 0.0002 max mem: 9377 +Train: [99] [3600/6250] eta: 0:06:07 lr: 0.000000 grad: 0.1618 (0.1641) loss: 0.8105 (0.8116) time: 0.1254 data: 0.0484 max mem: 9377 +Train: [99] [3700/6250] eta: 0:05:52 lr: 0.000000 grad: 0.1497 (0.1639) loss: 0.8136 (0.8116) time: 0.1270 data: 0.0390 max mem: 9377 +Train: [99] [3800/6250] eta: 0:05:37 lr: 0.000000 grad: 0.1511 (0.1637) loss: 0.8211 (0.8117) time: 0.1312 data: 0.0520 max mem: 9377 +Train: [99] [3900/6250] eta: 0:05:22 lr: 0.000000 grad: 0.1564 (0.1636) loss: 0.8086 (0.8118) time: 0.1158 data: 0.0401 max mem: 9377 +Train: [99] [4000/6250] eta: 0:05:08 lr: 0.000000 grad: 0.1547 (0.1634) loss: 0.8159 (0.8119) time: 0.1274 data: 0.0522 max mem: 9377 +Train: [99] [4100/6250] eta: 0:04:54 lr: 0.000000 grad: 0.1506 (0.1633) loss: 0.8170 (0.8119) time: 0.1162 data: 0.0299 max mem: 9377 +Train: [99] [4200/6250] eta: 0:04:39 lr: 0.000000 grad: 0.1505 (0.1631) loss: 0.8159 (0.8120) time: 0.1127 data: 0.0315 max mem: 9377 +Train: [99] [4300/6250] eta: 0:04:25 lr: 0.000000 grad: 0.1592 (0.1630) loss: 0.8120 (0.8121) time: 0.1258 data: 0.0483 max mem: 9377 +Train: [99] [4400/6250] eta: 0:04:11 lr: 0.000000 grad: 0.1451 (0.1628) loss: 0.8168 (0.8122) time: 0.1341 data: 0.0565 max mem: 9377 +Train: [99] [4500/6250] eta: 0:03:57 lr: 0.000000 grad: 0.1494 (0.1626) loss: 0.8138 (0.8122) time: 0.1364 data: 0.0620 max mem: 9377 +Train: [99] [4600/6250] eta: 0:03:43 lr: 0.000000 grad: 0.1615 (0.1623) loss: 0.8096 (0.8123) time: 0.1231 data: 0.0499 max mem: 9377 +Train: [99] [4700/6250] eta: 0:03:29 lr: 0.000000 grad: 0.1649 (0.1621) loss: 0.8124 (0.8123) time: 0.1171 data: 0.0429 max mem: 9377 +Train: [99] [4800/6250] eta: 0:03:15 lr: 0.000000 grad: 0.1471 (0.1619) loss: 0.8124 (0.8123) time: 0.1222 data: 0.0462 max mem: 9377 +Train: [99] [4900/6250] eta: 0:03:01 lr: 0.000000 grad: 0.1414 (0.1617) loss: 0.8146 (0.8124) time: 0.1180 data: 0.0388 max mem: 9377 +Train: [99] [5000/6250] eta: 0:02:47 lr: 0.000000 grad: 0.1548 (0.1616) loss: 0.8197 (0.8124) time: 0.1095 data: 0.0344 max mem: 9377 +Train: [99] [5100/6250] eta: 0:02:33 lr: 0.000000 grad: 0.1627 (0.1615) loss: 0.8119 (0.8124) time: 0.1113 data: 0.0350 max mem: 9377 +Train: [99] [5200/6250] eta: 0:02:20 lr: 0.000000 grad: 0.1584 (0.1614) loss: 0.8067 (0.8124) time: 0.1295 data: 0.0522 max mem: 9377 +Train: [99] [5300/6250] eta: 0:02:06 lr: 0.000000 grad: 0.1577 (0.1613) loss: 0.8044 (0.8124) time: 0.1221 data: 0.0484 max mem: 9377 +Train: [99] [5400/6250] eta: 0:01:53 lr: 0.000000 grad: 0.1447 (0.1612) loss: 0.8105 (0.8124) time: 0.1155 data: 0.0416 max mem: 9377 +Train: [99] [5500/6250] eta: 0:01:39 lr: 0.000000 grad: 0.1537 (0.1612) loss: 0.8103 (0.8123) time: 0.1301 data: 0.0565 max mem: 9377 +Train: [99] [5600/6250] eta: 0:01:26 lr: 0.000000 grad: 0.1543 (0.1610) loss: 0.8108 (0.8123) time: 0.1103 data: 0.0386 max mem: 9377 +Train: [99] [5700/6250] eta: 0:01:12 lr: 0.000000 grad: 0.1492 (0.1610) loss: 0.8141 (0.8123) time: 0.1277 data: 0.0558 max mem: 9377 +Train: [99] [5800/6250] eta: 0:00:59 lr: 0.000000 grad: 0.1421 (0.1609) loss: 0.8148 (0.8123) time: 0.1098 data: 0.0347 max mem: 9377 +Train: [99] [5900/6250] eta: 0:00:46 lr: 0.000000 grad: 0.1547 (0.1610) loss: 0.8158 (0.8123) time: 0.1133 data: 0.0351 max mem: 9377 +Train: [99] [6000/6250] eta: 0:00:32 lr: 0.000000 grad: 0.1455 (0.1609) loss: 0.8119 (0.8123) time: 0.1266 data: 0.0517 max mem: 9377 +Train: [99] [6100/6250] eta: 0:00:19 lr: 0.000000 grad: 0.1667 (0.1608) loss: 0.8033 (0.8122) time: 0.1122 data: 0.0373 max mem: 9377 +Train: [99] [6200/6250] eta: 0:00:06 lr: 0.000000 grad: 0.1515 (0.1608) loss: 0.8127 (0.8122) time: 0.1333 data: 0.0575 max mem: 9377 +Train: [99] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1609 (0.1609) loss: 0.8085 (0.8122) time: 0.1247 data: 0.0510 max mem: 9377 +Train: [99] Total time: 0:13:47 (0.1323 s / it) +Averaged stats: lr: 0.000000 grad: 0.1609 (0.1609) loss: 0.8085 (0.8122) +Eval (hcp-train-subset): [99] [ 0/62] eta: 0:04:34 loss: 0.8153 (0.8153) time: 4.4279 data: 4.3999 max mem: 9377 +Eval (hcp-train-subset): [99] [61/62] eta: 0:00:00 loss: 0.8050 (0.8062) time: 0.1146 data: 0.0902 max mem: 9377 +Eval (hcp-train-subset): [99] Total time: 0:00:11 (0.1910 s / it) +Averaged stats (hcp-train-subset): loss: 0.8050 (0.8062) +Making plots (hcp-train-subset): example=30 +Eval (hcp-val): [99] [ 0/62] eta: 0:05:28 loss: 0.8251 (0.8251) time: 5.3050 data: 5.2718 max mem: 9377 +Eval (hcp-val): [99] [61/62] eta: 0:00:00 loss: 0.8265 (0.8284) time: 0.1150 data: 0.0905 max mem: 9377 +Eval (hcp-val): [99] Total time: 0:00:12 (0.1997 s / it) +Averaged stats (hcp-val): loss: 0.8265 (0.8284) +Making plots (hcp-val): example=3 +Eval (nsd-val): [99] [ 0/62] eta: 0:05:07 loss: 0.8027 (0.8027) time: 4.9647 data: 4.9359 max mem: 9377 +Eval (nsd-val): [99] [61/62] eta: 0:00:00 loss: 0.8117 (0.8117) time: 0.0952 data: 0.0707 max mem: 9377 +Eval (nsd-val): [99] Total time: 0:00:11 (0.1886 s / it) +Averaged stats (nsd-val): loss: 0.8117 (0.8117) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_1/pretrain/checkpoint-00099.pth +done! training time: 1 day, 3:23:02 diff --git a/data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic/config.yaml b/data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e59f6fdb4e4c569ce773f01d46fde75c57237963 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic +remote_dir: null diff --git a/data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic/eval_table.csv b/data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..2cc54943231a9c1ee560b69d1b07df7b2babcd54 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_age,,0.3593813663804626,train,0.9862204724409449,0.005111042970423982,0.9861064710581741,0.005175152857325838,0.9860511335575709,0.005195479872449534 +flat_mae,patch,logistic,aabc_age,,0.3593813663804626,test,0.36538461538461536,0.06463840020502587,0.3578925521821632,0.06614268844830547,0.3592032967032967,0.06405533041331303 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,train,0.8385826771653543,0.016139106277656867,0.8380041581610076,0.016297313725363513,0.8393197065914317,0.0161011317172447 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,test,0.40384615384615385,0.05923919770201736,0.3821428571428571,0.05569598720201474,0.3999542124542125,0.05842530455042719 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,train,0.5688976377952756,0.02173288969826583,0.5646851729592575,0.02228787824213132,0.5687430594047822,0.021809271125977397 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,test,0.5576923076923077,0.06730465927232165,0.5487319832147419,0.06830475126908966,0.5544871794871795,0.06701709548305267 +flat_mae,patch,logistic,aabc_age,3,0.046415888336127774,train,0.8208661417322834,0.016790034657811974,0.8215740344113658,0.016722052571075972,0.8219948301556036,0.016744101170045503 +flat_mae,patch,logistic,aabc_age,3,0.046415888336127774,test,0.5192307692307693,0.06840614656281967,0.5163773008600595,0.06984540957685567,0.5190018315018314,0.06849659545754062 +flat_mae,patch,logistic,aabc_age,4,0.005994842503189409,train,0.6673228346456693,0.020713534949425228,0.6657567991442914,0.02111876630537634,0.6670857795235419,0.020775385008835717 +flat_mae,patch,logistic,aabc_age,4,0.005994842503189409,test,0.5576923076923077,0.06562246332093678,0.5523809523809524,0.06700780543533107,0.5604395604395604,0.065973480568854 +flat_mae,patch,logistic,aabc_age,5,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,5,166.81005372000556,test,0.46153846153846156,0.06046780159378873,0.45200632281482345,0.062483684613445604,0.459478021978022,0.06053303773479253 +flat_mae,patch,logistic,aabc_age,6,0.005994842503189409,train,0.65748031496063,0.019131482954211375,0.6553259952733899,0.019462722001721906,0.6577254458763035,0.019092483181511646 +flat_mae,patch,logistic,aabc_age,6,0.005994842503189409,test,0.5769230769230769,0.06970541352403135,0.5792420482743064,0.07097636604623843,0.5755494505494505,0.06982749284434343 +flat_mae,patch,logistic,aabc_age,7,0.000774263682681127,train,0.5767716535433071,0.020705860413547435,0.5712428751898696,0.02090433459792816,0.5761048852200564,0.0207063041478836 +flat_mae,patch,logistic,aabc_age,7,0.000774263682681127,test,0.4807692307692308,0.060108518727952684,0.44232843137254907,0.058497407050775795,0.47275641025641024,0.05909405429950046 +flat_mae,patch,logistic,aabc_age,8,0.005994842503189409,train,0.6771653543307087,0.02047276498835007,0.674310403105562,0.02079572645098535,0.6764137477008529,0.020445304632087165 +flat_mae,patch,logistic,aabc_age,8,0.005994842503189409,test,0.5,0.06379139217775648,0.5002289377289377,0.06342076763413294,0.5086996336996337,0.06436634498093523 +flat_mae,patch,logistic,aabc_age,9,0.046415888336127774,train,0.8346456692913385,0.01565438890917766,0.8336689016551717,0.01595094539307644,0.8351374885162517,0.015658495855010245 +flat_mae,patch,logistic,aabc_age,9,0.046415888336127774,test,0.4807692307692308,0.06772665212664822,0.4751602564102564,0.0690917504977582,0.4773351648351648,0.06759717781794651 +flat_mae,patch,logistic,aabc_age,10,0.005994842503189409,train,0.6712598425196851,0.01958498192858585,0.6696807441818564,0.019920752591426436,0.6720882824532163,0.019578594455878236 +flat_mae,patch,logistic,aabc_age,10,0.005994842503189409,test,0.46153846153846156,0.06483364009196126,0.4505622091828988,0.0645964340258125,0.459478021978022,0.06479925259651881 +flat_mae,patch,logistic,aabc_age,11,0.005994842503189409,train,0.6377952755905512,0.021130525336541018,0.6355025354573981,0.021458929815514366,0.638334453737996,0.02114706076590936 +flat_mae,patch,logistic,aabc_age,11,0.005994842503189409,test,0.5961538461538461,0.06526572902079149,0.5987192118226601,0.06653110153693226,0.5961538461538461,0.06525177740239879 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,train,0.6771653543307087,0.020690726043623705,0.6763979007633588,0.02092925399156013,0.6780690616794753,0.02068590566207705 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,test,0.4423076923076923,0.06344153530902971,0.4365231259968102,0.06446776624753814,0.44207875457875456,0.06348665597918453 +flat_mae,patch,logistic,aabc_age,13,0.005994842503189409,train,0.6633858267716536,0.02016963331665245,0.6613262269043796,0.020512049839523358,0.6638738063135202,0.020172235355092445 +flat_mae,patch,logistic,aabc_age,13,0.005994842503189409,test,0.4807692307692308,0.06617378948043691,0.47477703455964326,0.06663504550759104,0.4832875457875458,0.0667194637367675 +flat_mae,patch,logistic,aabc_age,14,0.046415888336127774,train,0.8405511811023622,0.015679973014106497,0.8407927873144576,0.01573903741011507,0.8410006732017745,0.015644784087457727 +flat_mae,patch,logistic,aabc_age,14,0.046415888336127774,test,0.5384615384615384,0.06245020501574734,0.5211675579322638,0.06797898800677389,0.5425824175824177,0.0630168816461007 +flat_mae,patch,logistic,aabc_age,15,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,15,21.54434690031882,test,0.4423076923076923,0.06746680095253688,0.44543956043956046,0.06700534931716601,0.44184981684981683,0.06734593412883222 +flat_mae,patch,logistic,aabc_age,16,0.005994842503189409,train,0.6535433070866141,0.020108028661846335,0.6529623520495732,0.02025473197881326,0.6538431478224511,0.020084134080010113 +flat_mae,patch,logistic,aabc_age,16,0.005994842503189409,test,0.5192307692307693,0.06401131325673666,0.5162711154544186,0.06528514077777343,0.5173992673992673,0.06410622270269783 +flat_mae,patch,logistic,aabc_age,17,0.000774263682681127,train,0.5787401574803149,0.021508690877121428,0.5734999832821788,0.022161325698907997,0.5779034463711357,0.021501576400583394 +flat_mae,patch,logistic,aabc_age,17,0.000774263682681127,test,0.40384615384615385,0.059532933056781236,0.37399436090225563,0.057598321019142494,0.3999542124542125,0.05853471994664114 +flat_mae,patch,logistic,aabc_age,18,0.005994842503189409,train,0.6692913385826772,0.020411658709463633,0.6683322182275715,0.020513484133645066,0.669804598869558,0.020339138287784955 +flat_mae,patch,logistic,aabc_age,18,0.005994842503189409,test,0.5192307692307693,0.0650367556524679,0.5012971562894164,0.06522984408882443,0.5114468864468865,0.06454451991620425 +flat_mae,patch,logistic,aabc_age,19,0.005994842503189409,train,0.6692913385826772,0.02164292366646174,0.6670508148194988,0.022018667350288117,0.6712099794970743,0.021596423053472983 +flat_mae,patch,logistic,aabc_age,19,0.005994842503189409,test,0.46153846153846156,0.06824870764105762,0.4642567110309046,0.06867922738477522,0.46130952380952384,0.06859915202684563 +flat_mae,patch,logistic,aabc_age,20,0.3593813663804626,train,0.9803149606299213,0.006204734401846548,0.9804735449959116,0.006159189490755112,0.9806090078616925,0.006118697592531963 +flat_mae,patch,logistic,aabc_age,20,0.3593813663804626,test,0.4807692307692308,0.05485185804839201,0.4559721887308094,0.05222298327283031,0.47687728937728935,0.05403671129456438 +flat_mae,patch,logistic,aabc_age,21,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,21,2.782559402207126,test,0.40384615384615385,0.061978761393765576,0.3850182411674347,0.06366581369849365,0.39880952380952384,0.061574328249956914 +flat_mae,patch,logistic,aabc_age,22,0.046415888336127774,train,0.8188976377952756,0.016855128483364106,0.8183032284039427,0.017020331946970862,0.8194435920205452,0.01685228025087765 +flat_mae,patch,logistic,aabc_age,22,0.046415888336127774,test,0.46153846153846156,0.06876203498749366,0.4741048593350384,0.06795864807600198,0.46291208791208793,0.06892648119723843 +flat_mae,patch,logistic,aabc_age,23,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,23,2.782559402207126,test,0.5,0.056795282744360326,0.4910714285714286,0.05895481597414775,0.49954212454212454,0.0570237074833746 +flat_mae,patch,logistic,aabc_age,24,0.005994842503189409,train,0.6633858267716536,0.021773077369398118,0.661925764209271,0.021911412146050763,0.6636562384323413,0.021736816136744185 +flat_mae,patch,logistic,aabc_age,24,0.005994842503189409,test,0.38461538461538464,0.06594335923517193,0.3864994425863991,0.06511280282532958,0.3839285714285714,0.06589713106690832 +flat_mae,patch,logistic,aabc_age,25,0.005994842503189409,train,0.6692913385826772,0.020896642023045132,0.6681909171458014,0.021019220753401607,0.6705572758535374,0.020789080112978715 +flat_mae,patch,logistic,aabc_age,25,0.005994842503189409,test,0.3076923076923077,0.059864294264700475,0.30356002554278416,0.05814421154858443,0.30815018315018317,0.05998259139173375 +flat_mae,patch,logistic,aabc_age,26,0.046415888336127774,train,0.8346456692913385,0.016125628129526164,0.8340158221255782,0.01626523976904646,0.8347023527538939,0.01615414071067884 +flat_mae,patch,logistic,aabc_age,26,0.046415888336127774,test,0.46153846153846156,0.06847990246357043,0.46543040293040294,0.06603380182279563,0.46543040293040294,0.06896882177063035 +flat_mae,patch,logistic,aabc_age,27,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,27,2.782559402207126,test,0.3269230769230769,0.058104310913610895,0.3076765188834154,0.05813108157155638,0.326007326007326,0.057818611182935686 +flat_mae,patch,logistic,aabc_age,28,0.005994842503189409,train,0.6830708661417323,0.021526635624084002,0.6829035240540375,0.021712381809859887,0.6843850033276497,0.021454891240156475 +flat_mae,patch,logistic,aabc_age,28,0.005994842503189409,test,0.4807692307692308,0.0670802310405212,0.4807407407407407,0.0665760551078589,0.4789377289377289,0.0670969291722002 +flat_mae,patch,logistic,aabc_age,29,0.046415888336127774,train,0.8385826771653543,0.016623937776911187,0.838672936471599,0.016683562518776313,0.8402223435860748,0.01651394193914067 +flat_mae,patch,logistic,aabc_age,29,0.046415888336127774,test,0.4423076923076923,0.0638910291783397,0.43980519480519475,0.06674539385704154,0.44024725274725274,0.06405740090570425 +flat_mae,patch,logistic,aabc_age,30,0.005994842503189409,train,0.6515748031496063,0.02097176653574952,0.6486281666607834,0.021405519255667596,0.651627072109308,0.021019302974545667 +flat_mae,patch,logistic,aabc_age,30,0.005994842503189409,test,0.5192307692307693,0.06488007826916531,0.5234126984126983,0.06119566832942112,0.5247252747252747,0.06520531567246199 +flat_mae,patch,logistic,aabc_age,31,0.046415888336127774,train,0.8385826771653543,0.0167625127580464,0.8386900128124036,0.016865737238718728,0.8391521253804741,0.016703728335207155 +flat_mae,patch,logistic,aabc_age,31,0.046415888336127774,test,0.5576923076923077,0.06430524240824718,0.54683908045977,0.06573199383453367,0.5544871794871795,0.06408797682664334 +flat_mae,patch,logistic,aabc_age,32,0.046415888336127774,train,0.8110236220472441,0.017127341787086314,0.810860980929892,0.017224262944818255,0.8115966437726918,0.017078315245389632 +flat_mae,patch,logistic,aabc_age,32,0.046415888336127774,test,0.4423076923076923,0.06288991979510167,0.43863346104725415,0.06359884722091162,0.4416208791208791,0.06302106181697256 +flat_mae,patch,logistic,aabc_age,33,0.000774263682681127,train,0.5846456692913385,0.020948254716294423,0.5805268859680625,0.02137193134536715,0.5836666577162157,0.020911601013855103 +flat_mae,patch,logistic,aabc_age,33,0.000774263682681127,test,0.4423076923076923,0.05546280538401469,0.39299242424242425,0.04666623187044457,0.4340659340659341,0.05412728667374261 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,train,0.655511811023622,0.020753627717248067,0.6532256860950909,0.020982825181115813,0.6559768713954457,0.0206777331772846 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,test,0.4423076923076923,0.06825036575752179,0.45478543195934495,0.06957173577803566,0.44207875457875456,0.06853747495547344 +flat_mae,patch,logistic,aabc_age,35,0.046415888336127774,train,0.8385826771653543,0.01566319471472336,0.8381279838596912,0.015865505834145317,0.838667002947895,0.015670588597229976 +flat_mae,patch,logistic,aabc_age,35,0.046415888336127774,test,0.36538461538461536,0.06389445577896365,0.3663961038961039,0.06209892121098172,0.36744505494505497,0.06427161220333454 +flat_mae,patch,logistic,aabc_age,36,0.046415888336127774,train,0.8188976377952756,0.01701868641439092,0.8181266187985113,0.017104475136800676,0.8188584962475234,0.0170752010338078 +flat_mae,patch,logistic,aabc_age,36,0.046415888336127774,test,0.46153846153846156,0.061182564616218495,0.44024699808904477,0.06299532871827582,0.46222527472527475,0.061261740296321116 +flat_mae,patch,logistic,aabc_age,37,0.005994842503189409,train,0.6535433070866141,0.02022260449119908,0.6520411025302278,0.020533180976517393,0.6547134193471669,0.02027547324125603 +flat_mae,patch,logistic,aabc_age,37,0.005994842503189409,test,0.4807692307692308,0.06588472764820467,0.47777777777777775,0.06814378584143704,0.4789377289377289,0.06594883326150192 +flat_mae,patch,logistic,aabc_age,38,0.000774263682681127,train,0.5492125984251969,0.02065142985275135,0.5404352628222507,0.021070775429099842,0.5492197284561047,0.020724502588480583 +flat_mae,patch,logistic,aabc_age,38,0.000774263682681127,test,0.5384615384615384,0.07081807469948623,0.5329697106012896,0.07317491575572407,0.5338827838827839,0.0708554841087877 +flat_mae,patch,logistic,aabc_age,39,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,39,2.782559402207126,test,0.40384615384615385,0.06865698118246066,0.41166109253065775,0.06784016599348276,0.4033882783882784,0.06882098726636592 +flat_mae,patch,logistic,aabc_age,40,0.046415888336127774,train,0.8248031496062992,0.01680950960376971,0.824756095934538,0.0168063213909881,0.8253743845765829,0.016740191970664002 +flat_mae,patch,logistic,aabc_age,40,0.046415888336127774,test,0.5,0.060999211848628394,0.47914746543778797,0.06413372051028102,0.49793956043956045,0.061073188170595684 +flat_mae,patch,logistic,aabc_age,41,0.046415888336127774,train,0.8346456692913385,0.015990977891384508,0.8347173207886448,0.016014771117791343,0.834902299434779,0.01603616971679048 +flat_mae,patch,logistic,aabc_age,41,0.046415888336127774,test,0.4230769230769231,0.061458734411822695,0.41131784881784883,0.062236455696746816,0.4237637362637362,0.06178382669296002 +flat_mae,patch,logistic,aabc_age,42,0.000774263682681127,train,0.5688976377952756,0.02091488883224459,0.5612078770340387,0.021294221835290672,0.5682255715022757,0.020958414908996602 +flat_mae,patch,logistic,aabc_age,42,0.000774263682681127,test,0.4230769230769231,0.06274390278965843,0.4033994032395567,0.06510499841712931,0.4283424908424909,0.06351567763967293 +flat_mae,patch,logistic,aabc_age,43,0.000774263682681127,train,0.5787401574803149,0.020027609360836696,0.5728615303615303,0.020290414791866482,0.5778534597009144,0.020058342210703434 +flat_mae,patch,logistic,aabc_age,43,0.000774263682681127,test,0.4423076923076923,0.06592383986633284,0.4362824675324676,0.06662216777772453,0.43887362637362637,0.06571131729858465 +flat_mae,patch,logistic,aabc_age,44,0.005994842503189409,train,0.6732283464566929,0.01839659390673542,0.6701226897492358,0.018640985716513576,0.6739544514748106,0.01834964416682747 +flat_mae,patch,logistic,aabc_age,44,0.005994842503189409,test,0.5,0.06773123880499435,0.49181547619047616,0.07042791099656556,0.49404761904761907,0.0677969807819352 +flat_mae,patch,logistic,aabc_age,45,0.046415888336127774,train,0.8051181102362205,0.01731490691895289,0.8039926814837315,0.017696480344007903,0.8062185815197482,0.01730399005794323 +flat_mae,patch,logistic,aabc_age,45,0.046415888336127774,test,0.5384615384615384,0.06436830047864302,0.540994623655914,0.06585721628860068,0.5428113553113553,0.06463631016361242 +flat_mae,patch,logistic,aabc_age,46,0.046415888336127774,train,0.8307086614173228,0.01765604019794674,0.8297596725318512,0.017915119041523934,0.8311052304517357,0.017681640365353867 +flat_mae,patch,logistic,aabc_age,46,0.046415888336127774,test,0.5,0.06718886213701102,0.5046620046620047,0.06782734387856687,0.5059523809523809,0.06713380102702346 +flat_mae,patch,logistic,aabc_age,47,0.000774263682681127,train,0.5767716535433071,0.020187099916481467,0.5723418817622408,0.020335057345272325,0.5761548718902777,0.020126823797176497 +flat_mae,patch,logistic,aabc_age,47,0.000774263682681127,test,0.38461538461538464,0.06327927205799186,0.3704415073569206,0.06346109201885287,0.3823260073260073,0.06276503997570618 +flat_mae,patch,logistic,aabc_age,48,0.005994842503189409,train,0.6673228346456693,0.02148223719664761,0.6657408331704905,0.021869325077675847,0.6673533340749421,0.02150458531175324 +flat_mae,patch,logistic,aabc_age,48,0.005994842503189409,test,0.4807692307692308,0.06408320220411685,0.47827903091060986,0.06565823908249702,0.48031135531135527,0.06411641006928165 +flat_mae,patch,logistic,aabc_age,49,9.999999999999999e-05,train,0.49803149606299213,0.017981645494201833,0.4748676330664184,0.019162293664356463,0.49590061355341214,0.017899119933828603 +flat_mae,patch,logistic,aabc_age,49,9.999999999999999e-05,test,0.4230769230769231,0.05841104854148723,0.401328320802005,0.05882507737576725,0.4164377289377289,0.05764627424432652 +flat_mae,patch,logistic,aabc_age,50,0.005994842503189409,train,0.687007874015748,0.020515195739902462,0.6864757913015167,0.02069854538310309,0.6877145710784079,0.02041388496681423 +flat_mae,patch,logistic,aabc_age,50,0.005994842503189409,test,0.38461538461538464,0.06320222076841266,0.3848007246376812,0.06351056070885817,0.38278388278388276,0.06332834729307044 +flat_mae,patch,logistic,aabc_age,51,0.005994842503189409,train,0.6791338582677166,0.019830972455604307,0.6786413949145241,0.02004853858809992,0.6807878810254915,0.019681333187543105 +flat_mae,patch,logistic,aabc_age,51,0.005994842503189409,test,0.4230769230769231,0.060528686533668684,0.40366653031011374,0.06167128787639694,0.4194139194139194,0.05988501541514865 +flat_mae,patch,logistic,aabc_age,52,0.046415888336127774,train,0.8287401574803149,0.016232508339398877,0.8285760877206403,0.0163642713710123,0.8291890747599201,0.0161476032026903 +flat_mae,patch,logistic,aabc_age,52,0.046415888336127774,test,0.4423076923076923,0.06487670372630328,0.4349655529378168,0.06655303647135749,0.4448260073260073,0.06491580068985833 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,train,0.49015748031496065,0.0200146669513581,0.47512739971433415,0.020668743700686363,0.4882536119864439,0.019952658219637216 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,test,0.5384615384615384,0.05868725404193186,0.49271206690561525,0.057744236978098086,0.5290750915750916,0.05770404579989048 +flat_mae,patch,logistic,aabc_age,54,0.005994842503189409,train,0.6673228346456693,0.020009342786498453,0.6653094998136571,0.02024843931193439,0.6685587680215732,0.01998872728899996 +flat_mae,patch,logistic,aabc_age,54,0.005994842503189409,test,0.4807692307692308,0.06300065745876535,0.46779812637105767,0.0648231564668786,0.48031135531135527,0.0629677853988796 +flat_mae,patch,logistic,aabc_age,55,0.005994842503189409,train,0.655511811023622,0.020189702269923542,0.653027301035371,0.02043925606747198,0.6554917489628664,0.020114995148143352 +flat_mae,patch,logistic,aabc_age,55,0.005994842503189409,test,0.5769230769230769,0.06023301646264571,0.5733516483516483,0.06160115460403302,0.5810439560439561,0.06063064244856499 +flat_mae,patch,logistic,aabc_age,56,0.000774263682681127,train,0.594488188976378,0.021931305113109166,0.5904975516776759,0.022196064382850107,0.5937972895477273,0.021899212372184052 +flat_mae,patch,logistic,aabc_age,56,0.000774263682681127,test,0.40384615384615385,0.06090387955113121,0.38267682781614676,0.061091420333888626,0.40293040293040294,0.06088914447362108 +flat_mae,patch,logistic,aabc_age,57,0.046415888336127774,train,0.8326771653543307,0.01675023353988378,0.8317337496250659,0.01705177015755197,0.8327362103918571,0.016798102654782286 +flat_mae,patch,logistic,aabc_age,57,0.046415888336127774,test,0.4807692307692308,0.06540509860153812,0.4822222222222222,0.06342096999747539,0.4816849816849817,0.06554600760403767 +flat_mae,patch,logistic,aabc_age,58,0.000774263682681127,train,0.5787401574803149,0.020991976463071594,0.5733549785934519,0.0215414294526884,0.5774683106087777,0.021021121414489575 +flat_mae,patch,logistic,aabc_age,58,0.000774263682681127,test,0.40384615384615385,0.05445234331328352,0.3639393939393939,0.04753937933158865,0.3983516483516484,0.05317522436410746 +flat_mae,patch,logistic,aabc_age,59,0.005994842503189409,train,0.6850393700787402,0.020272561504494187,0.6836390295232291,0.020519882216966705,0.6856984420461498,0.02018551137419996 +flat_mae,patch,logistic,aabc_age,59,0.005994842503189409,test,0.5192307692307693,0.06723450966535584,0.5182919254658385,0.06808638282870305,0.5222069597069597,0.06712089745563525 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,train,0.6712598425196851,0.020474244317907335,0.6668425436742265,0.02087423406815924,0.6710856721181306,0.02047434959996285 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,test,0.38461538461538464,0.0596746107787695,0.3719087425983978,0.06217334274404628,0.3823260073260073,0.05933683029543273 +flat_mae,patch,logistic,aabc_age,61,0.046415888336127774,train,0.8228346456692913,0.01659620627939185,0.8227512214046587,0.016746688362350205,0.824546068290662,0.01654269146885166 +flat_mae,patch,logistic,aabc_age,61,0.046415888336127774,test,0.4230769230769231,0.06489892004097762,0.43480519480519486,0.06462193816307538,0.42422161172161177,0.06494660844909432 +flat_mae,patch,logistic,aabc_age,62,0.046415888336127774,train,0.812992125984252,0.015697882256757258,0.8127215775568638,0.015842750845714564,0.8145830176701081,0.015674031956065284 +flat_mae,patch,logistic,aabc_age,62,0.046415888336127774,test,0.5,0.06659401860234497,0.49849206349206354,0.06464129008748927,0.4965659340659341,0.06627537590077698 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,train,0.6732283464566929,0.020148018510148826,0.6727555726793532,0.02023863081667139,0.6737192623933379,0.02016804508909663 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,test,0.36538461538461536,0.0510490395391512,0.32669343538908757,0.04628185412564457,0.36561355311355315,0.05119345013214872 +flat_mae,patch,logistic,aabc_age,64,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,64,21.54434690031882,test,0.38461538461538464,0.06614865811499279,0.38164191841611195,0.06590657109001002,0.38118131868131866,0.06593314902413683 +flat_mae,patch,logistic,aabc_age,65,0.046415888336127774,train,0.8228346456692913,0.01668468263879386,0.8221986848492873,0.016899358794317688,0.8239609725176404,0.01663383456551384 +flat_mae,patch,logistic,aabc_age,65,0.046415888336127774,test,0.5769230769230769,0.06556689524050449,0.5742753623188406,0.06621427182592991,0.5753205128205129,0.0656954316976289 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,train,0.6594488188976378,0.02009653218197637,0.6570219562662738,0.020554997629598396,0.6597915615787828,0.020175238115245596 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,test,0.4807692307692308,0.06521783026608126,0.48236263736263735,0.06513602506729406,0.4835164835164835,0.06555535138089703 +flat_mae,patch,logistic,aabc_age,67,0.005994842503189409,train,0.6633858267716536,0.02014964168455907,0.6623561706181802,0.02034836405286549,0.6631035081292471,0.020127775401934743 +flat_mae,patch,logistic,aabc_age,67,0.005994842503189409,test,0.5384615384615384,0.05997873143750266,0.5128019323671498,0.06246970640210205,0.532051282051282,0.05961990969326364 +flat_mae,patch,logistic,aabc_age,68,0.005994842503189409,train,0.655511811023622,0.021267369256727325,0.65402781217947,0.021378162503896393,0.656026858065667,0.02126553976603494 +flat_mae,patch,logistic,aabc_age,68,0.005994842503189409,test,0.5192307692307693,0.06370321116259396,0.506328320802005,0.06571401521041073,0.5157967032967032,0.06363553741328075 +flat_mae,patch,logistic,aabc_age,69,0.000774263682681127,train,0.5748031496062992,0.020479124817460007,0.5670670016967446,0.021142810312069323,0.5743239452692711,0.02045867032035232 +flat_mae,patch,logistic,aabc_age,69,0.000774263682681127,test,0.36538461538461536,0.05847513758834888,0.328125,0.050370172453118096,0.3585164835164835,0.057066394320170855 +flat_mae,patch,logistic,aabc_age,70,0.046415888336127774,train,0.8366141732283464,0.01647056266673575,0.8363548277798215,0.016713831423822358,0.8379062945324891,0.0163629376597629 +flat_mae,patch,logistic,aabc_age,70,0.046415888336127774,test,0.4807692307692308,0.0721678168367469,0.48115384615384615,0.07236177232271801,0.48511904761904756,0.07254209348425958 +flat_mae,patch,logistic,aabc_age,71,0.000774263682681127,train,0.5669291338582677,0.020965509941454175,0.5596125752899224,0.021298576496122955,0.5655567388264808,0.020948474529526813 +flat_mae,patch,logistic,aabc_age,71,0.000774263682681127,test,0.5,0.062849719793381,0.4951258419000354,0.06294241909834386,0.4965659340659341,0.06257934154719906 +flat_mae,patch,logistic,aabc_age,72,0.3593813663804626,train,0.9724409448818898,0.007751611585661705,0.9726644986078075,0.00769440309285248,0.9729296408247968,0.0076456039833256864 +flat_mae,patch,logistic,aabc_age,72,0.3593813663804626,test,0.5961538461538461,0.06136122313107858,0.5867532467532468,0.06418117339407509,0.592948717948718,0.06138665671038226 +flat_mae,patch,logistic,aabc_age,73,0.000774263682681127,train,0.5787401574803149,0.020927445231464063,0.5739480629771081,0.02142051165800311,0.5786061366848936,0.02096203710890515 +flat_mae,patch,logistic,aabc_age,73,0.000774263682681127,test,0.5384615384615384,0.06053073941561579,0.5286150630978217,0.059657052042307306,0.5336538461538461,0.060177059922395186 +flat_mae,patch,logistic,aabc_age,74,0.000774263682681127,train,0.5728346456692913,0.02144645822510422,0.5653216055277417,0.022219486359718117,0.5718726804746552,0.021366552173906723 +flat_mae,patch,logistic,aabc_age,74,0.000774263682681127,test,0.5384615384615384,0.06420210092601028,0.5211137060283171,0.0672598171914319,0.5322802197802198,0.06391657247008882 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,train,0.655511811023622,0.019978075201254445,0.6536407742238131,0.02014277953126167,0.6561944392766246,0.02001008233987747 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,test,0.5384615384615384,0.06788870111914773,0.5415745544777802,0.06872070420282618,0.5412087912087912,0.06813088263859277 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,train,0.8248031496062992,0.016439302022935823,0.8242791606119538,0.016611220159948016,0.8250568433549614,0.016521713107192637 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,test,0.5192307692307693,0.06500814696326633,0.5226070226070225,0.06434953042249686,0.5233516483516484,0.06540019331689348 +flat_mae,patch,logistic,aabc_age,77,0.046415888336127774,train,0.8149606299212598,0.01723915004797476,0.8147008923338181,0.01739558815278067,0.8154113339560289,0.017259161779078373 +flat_mae,patch,logistic,aabc_age,77,0.046415888336127774,test,0.4807692307692308,0.0685245604481735,0.49117778772951187,0.06549190072806993,0.4832875457875458,0.06886195000867194 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,train,0.812992125984252,0.017219348637853137,0.8120248155623895,0.01743599307708797,0.8142154897782653,0.01708901526559648 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,test,0.5576923076923077,0.06854656560911518,0.5700773411371237,0.06787105757724143,0.5592948717948718,0.06873593959157892 +flat_mae,patch,logistic,aabc_age,79,0.046415888336127774,train,0.8149606299212598,0.017928172057929,0.8153197282017547,0.017950592653423213,0.8170842691349453,0.017801244858020725 +flat_mae,patch,logistic,aabc_age,79,0.046415888336127774,test,0.4423076923076923,0.06374054054427009,0.4184373900725402,0.06460209767844857,0.43864468864468864,0.06320186158034441 +flat_mae,patch,logistic,aabc_age,80,0.046415888336127774,train,0.8464566929133859,0.01574370954472868,0.8468514588727359,0.01577027006050886,0.8475841694013491,0.015699178596620622 +flat_mae,patch,logistic,aabc_age,80,0.046415888336127774,test,0.5,0.05347846992540204,0.46475367161866016,0.05790999732145135,0.5020604395604396,0.05394009386315277 +flat_mae,patch,logistic,aabc_age,81,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,81,166.81005372000556,test,0.4423076923076923,0.06030465062973141,0.43522764052217067,0.057761096469901,0.4358974358974359,0.05983299128177099 +flat_mae,patch,logistic,aabc_age,82,0.046415888336127774,train,0.8307086614173228,0.015555154979160218,0.8303711057895382,0.015730585920053563,0.831857907435715,0.015457897510049234 +flat_mae,patch,logistic,aabc_age,82,0.046415888336127774,test,0.5,0.0669303599043295,0.49064558629776023,0.07045954211844949,0.5011446886446886,0.06711284997165604 +flat_mae,patch,logistic,aabc_age,83,0.005994842503189409,train,0.6830708661417323,0.02117017068547756,0.68252981416498,0.021281949120766964,0.6839998542355132,0.02112481009538388 +flat_mae,patch,logistic,aabc_age,83,0.005994842503189409,test,0.4807692307692308,0.06501706648958597,0.4654459370810873,0.06773652766245995,0.4787087912087912,0.06491104491994917 +flat_mae,patch,logistic,aabc_age,84,0.005994842503189409,train,0.6515748031496063,0.020895092084261,0.6505056561945634,0.02114112722148123,0.652964844866309,0.020872570591040005 +flat_mae,patch,logistic,aabc_age,84,0.005994842503189409,test,0.5576923076923077,0.06094308140878366,0.5439673229995811,0.06396480555849382,0.553113553113553,0.060655109924712874 +flat_mae,patch,logistic,aabc_age,85,0.046415888336127774,train,0.8070866141732284,0.018448886110624597,0.8068749946560222,0.018621922993975623,0.8088697929952492,0.01832492937179889 +flat_mae,patch,logistic,aabc_age,85,0.046415888336127774,test,0.46153846153846156,0.06344045104192093,0.45228937728937735,0.06318091578482546,0.459478021978022,0.06326273236164247 +flat_mae,patch,logistic,aabc_age,86,0.3593813663804626,train,0.9803149606299213,0.006320554398058439,0.9804735449959116,0.006273786352062454,0.9806090078616925,0.00624447091046411 +flat_mae,patch,logistic,aabc_age,86,0.3593813663804626,test,0.5576923076923077,0.06303106916809656,0.5276009588788494,0.06490237937393359,0.5485347985347986,0.06257902372305989 +flat_mae,patch,logistic,aabc_age,87,0.046415888336127774,train,0.8366141732283464,0.015734559197915485,0.8360185091473314,0.01582875469300564,0.8372535908889525,0.01567820906937241 +flat_mae,patch,logistic,aabc_age,87,0.046415888336127774,test,0.5,0.06421206544811049,0.4874231950844854,0.06589868350070553,0.5038919413919414,0.06468712386018521 +flat_mae,patch,logistic,aabc_age,88,0.000774263682681127,train,0.5669291338582677,0.02040671460250281,0.5603753956618553,0.021002758994751233,0.5673972552163549,0.020384433145836576 +flat_mae,patch,logistic,aabc_age,88,0.000774263682681127,test,0.4807692307692308,0.06385415844168535,0.4648477583537006,0.06598395516900843,0.47870879120879123,0.06382203050041937 +flat_mae,patch,logistic,aabc_age,89,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,89,166.81005372000556,test,0.38461538461538464,0.06190630446525119,0.3862524943087603,0.06275844331213103,0.38118131868131866,0.06169263015593813 +flat_mae,patch,logistic,aabc_age,90,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,90,21.54434690031882,test,0.5192307692307693,0.065074593066126,0.5122235757121439,0.06764844005954385,0.5160256410256411,0.06507407745732914 +flat_mae,patch,logistic,aabc_age,91,0.005994842503189409,train,0.6771653543307087,0.02074418377864796,0.6755343042024673,0.020930774996387812,0.6791069144151488,0.020584848947051683 +flat_mae,patch,logistic,aabc_age,91,0.005994842503189409,test,0.4230769230769231,0.06508706049428704,0.4147537647537648,0.06738860486029774,0.4226190476190476,0.06520289701117521 +flat_mae,patch,logistic,aabc_age,92,0.046415888336127774,train,0.8188976377952756,0.017422766072169908,0.8186292555625713,0.017570404443498437,0.8201462823343032,0.017308970302441013 +flat_mae,patch,logistic,aabc_age,92,0.046415888336127774,test,0.5769230769230769,0.05971107704516383,0.5569147244559538,0.06472420010160243,0.579441391941392,0.05991358625551488 +flat_mae,patch,logistic,aabc_age,93,0.005994842503189409,train,0.6633858267716536,0.02047102470873663,0.6617344357005468,0.020750662475837553,0.6643589287460994,0.020499706561980066 +flat_mae,patch,logistic,aabc_age,93,0.005994842503189409,test,0.4423076923076923,0.06516587846393487,0.44333333333333336,0.06437384724700111,0.44047619047619047,0.06525937070170837 +flat_mae,patch,logistic,aabc_age,94,0.000774263682681127,train,0.5866141732283464,0.0210431373047665,0.5800298735083146,0.021737709344189508,0.5857004079487675,0.021001024867413547 +flat_mae,patch,logistic,aabc_age,94,0.000774263682681127,test,0.38461538461538464,0.06466943709879155,0.3750158413616158,0.06389356228751095,0.38095238095238093,0.06434190217932076 +flat_mae,patch,logistic,aabc_age,95,0.005994842503189409,train,0.6732283464566929,0.019988494117361467,0.6717203622022899,0.020179246230875268,0.6746895072584961,0.019909750161679902 +flat_mae,patch,logistic,aabc_age,95,0.005994842503189409,test,0.5192307692307693,0.06123109537988127,0.48295019157088126,0.05771678202255695,0.5098443223443223,0.06015470265674004 +flat_mae,patch,logistic,aabc_age,96,0.046415888336127774,train,0.8248031496062992,0.01711319959046818,0.8235980341015746,0.017404889688624473,0.8245717209223823,0.017112740750762537 +flat_mae,patch,logistic,aabc_age,96,0.046415888336127774,test,0.5576923076923077,0.06576986054579267,0.5655340532498727,0.06595349689669233,0.5608974358974359,0.06587028148696997 +flat_mae,patch,logistic,aabc_age,97,0.005994842503189409,train,0.6594488188976378,0.021136555254060118,0.6579345571400488,0.021442863648752274,0.6598415482490041,0.021170599393184445 +flat_mae,patch,logistic,aabc_age,97,0.005994842503189409,test,0.4230769230769231,0.06320067597571234,0.4230331262939959,0.06148754494921732,0.4223901098901099,0.06314911870028754 +flat_mae,patch,logistic,aabc_age,98,0.046415888336127774,train,0.8248031496062992,0.016240754462900145,0.824572763885817,0.016365614789038464,0.8246217075926037,0.016244329603054986 +flat_mae,patch,logistic,aabc_age,98,0.046415888336127774,test,0.4807692307692308,0.06244292068117303,0.465752688172043,0.06522002043118817,0.48168498168498164,0.06307552338371422 +flat_mae,patch,logistic,aabc_age,99,0.3593813663804626,train,0.9862204724409449,0.004934275701068324,0.9861727927222067,0.004970884096801166,0.9859547046447087,0.005054165603587472 +flat_mae,patch,logistic,aabc_age,99,0.3593813663804626,test,0.5192307692307693,0.06849665261980575,0.5089999444722083,0.07014339556876967,0.5233516483516484,0.06895745994264145 +flat_mae,patch,logistic,aabc_age,100,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,100,166.81005372000556,test,0.36538461538461536,0.06426420134075034,0.3691931216931217,0.06309550935143939,0.3676739926739927,0.06460448611761586 diff --git a/data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic/log.txt b/data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5e8fcc2a51cc80ffc0b8725942b1232c81bafa63 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:27:40 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/aabc_age__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:16:38 time: 4.3774 data: 3.7520 max mem: 3205 +extract (train) [ 20/228] eta: 0:01:29 time: 0.2336 data: 0.0760 max mem: 3393 +extract (train) [ 40/228] eta: 0:01:00 time: 0.2085 data: 0.0632 max mem: 3393 +extract (train) [ 60/228] eta: 0:00:47 time: 0.2076 data: 0.0635 max mem: 3393 +extract (train) [ 80/228] eta: 0:00:38 time: 0.1860 data: 0.0563 max mem: 3393 +extract (train) [100/228] eta: 0:00:31 time: 0.2013 data: 0.0608 max mem: 3393 +extract (train) [120/228] eta: 0:00:26 time: 0.2009 data: 0.0619 max mem: 3393 +extract (train) [140/228] eta: 0:00:20 time: 0.1883 data: 0.0543 max mem: 3393 +extract (train) [160/228] eta: 0:00:15 time: 0.1919 data: 0.0583 max mem: 3393 +extract (train) [180/228] eta: 0:00:10 time: 0.1922 data: 0.0585 max mem: 3393 +extract (train) [200/228] eta: 0:00:06 time: 0.1643 data: 0.0436 max mem: 3393 +extract (train) [220/228] eta: 0:00:01 time: 0.1620 data: 0.0425 max mem: 3393 +extract (train) [227/228] eta: 0:00:00 time: 0.1613 data: 0.0443 max mem: 3393 +extract (train) Total time: 0:00:48 (0.2137 s / it) +extract (validation) [ 0/27] eta: 0:01:47 time: 3.9669 data: 3.8189 max mem: 3393 +extract (validation) [20/27] eta: 0:00:02 time: 0.1638 data: 0.0430 max mem: 3393 +extract (validation) [26/27] eta: 0:00:00 time: 0.1521 data: 0.0392 max mem: 3393 +extract (validation) Total time: 0:00:08 (0.3125 s / it) +extract (test) [ 0/26] eta: 0:01:45 time: 4.0503 data: 3.9172 max mem: 3393 +extract (test) [20/26] eta: 0:00:02 time: 0.1758 data: 0.0475 max mem: 3393 +extract (test) [25/26] eta: 0:00:00 time: 0.1623 data: 0.0414 max mem: 3393 +extract (test) Total time: 0:00:08 (0.3332 s / it) +feature extraction time: 0:01:05 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|--------:|:--------|--------:|----------:|--------:|----------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | | 0.35938 | train | 0.98622 | 0.005111 | 0.98611 | 0.0051752 | 0.98605 | 0.0051955 | +| flat_mae | patch | logistic | aabc_age | | 0.35938 | test | 0.36538 | 0.064638 | 0.35789 | 0.066143 | 0.3592 | 0.064055 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.05923919770201736, "f1": 0.3821428571428571, "f1_std": 0.05569598720201474, "bacc": 0.3999542124542125, "bacc_std": 0.05842530455042719} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 0.000774263682681127, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06730465927232165, "f1": 0.5487319832147419, "f1_std": 0.06830475126908966, "bacc": 0.5544871794871795, "bacc_std": 0.06701709548305267} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06840614656281967, "f1": 0.5163773008600595, "f1_std": 0.06984540957685567, "bacc": 0.5190018315018314, "bacc_std": 0.06849659545754062} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06562246332093678, "f1": 0.5523809523809524, "f1_std": 0.06700780543533107, "bacc": 0.5604395604395604, "bacc_std": 0.065973480568854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 166.81005372000556, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06046780159378873, "f1": 0.45200632281482345, "f1_std": 0.062483684613445604, "bacc": 0.459478021978022, "bacc_std": 0.06053303773479253} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06970541352403135, "f1": 0.5792420482743064, "f1_std": 0.07097636604623843, "bacc": 0.5755494505494505, "bacc_std": 0.06982749284434343} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.060108518727952684, "f1": 0.44232843137254907, "f1_std": 0.058497407050775795, "bacc": 0.47275641025641024, "bacc_std": 0.05909405429950046} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06379139217775648, "f1": 0.5002289377289377, "f1_std": 0.06342076763413294, "bacc": 0.5086996336996337, "bacc_std": 0.06436634498093523} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06772665212664822, "f1": 0.4751602564102564, "f1_std": 0.0690917504977582, "bacc": 0.4773351648351648, "bacc_std": 0.06759717781794651} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06483364009196126, "f1": 0.4505622091828988, "f1_std": 0.0645964340258125, "bacc": 0.459478021978022, "bacc_std": 0.06479925259651881} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06526572902079149, "f1": 0.5987192118226601, "f1_std": 0.06653110153693226, "bacc": 0.5961538461538461, "bacc_std": 0.06525177740239879} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06344153530902971, "f1": 0.4365231259968102, "f1_std": 0.06446776624753814, "bacc": 0.44207875457875456, "bacc_std": 0.06348665597918453} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06617378948043691, "f1": 0.47477703455964326, "f1_std": 0.06663504550759104, "bacc": 0.4832875457875458, "bacc_std": 0.0667194637367675} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06245020501574734, "f1": 0.5211675579322638, "f1_std": 0.06797898800677389, "bacc": 0.5425824175824177, "bacc_std": 0.0630168816461007} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 21.54434690031882, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06746680095253688, "f1": 0.44543956043956046, "f1_std": 0.06700534931716601, "bacc": 0.44184981684981683, "bacc_std": 0.06734593412883222} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06401131325673666, "f1": 0.5162711154544186, "f1_std": 0.06528514077777343, "bacc": 0.5173992673992673, "bacc_std": 0.06410622270269783} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.059532933056781236, "f1": 0.37399436090225563, "f1_std": 0.057598321019142494, "bacc": 0.3999542124542125, "bacc_std": 0.05853471994664114} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.0650367556524679, "f1": 0.5012971562894164, "f1_std": 0.06522984408882443, "bacc": 0.5114468864468865, "bacc_std": 0.06454451991620425} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06824870764105762, "f1": 0.4642567110309046, "f1_std": 0.06867922738477522, "bacc": 0.46130952380952384, "bacc_std": 0.06859915202684563} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05485185804839201, "f1": 0.4559721887308094, "f1_std": 0.05222298327283031, "bacc": 0.47687728937728935, "bacc_std": 0.05403671129456438} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 2.782559402207126, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.061978761393765576, "f1": 0.3850182411674347, "f1_std": 0.06366581369849365, "bacc": 0.39880952380952384, "bacc_std": 0.061574328249956914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06876203498749366, "f1": 0.4741048593350384, "f1_std": 0.06795864807600198, "bacc": 0.46291208791208793, "bacc_std": 0.06892648119723843} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 2.782559402207126, "split": "test", "acc": 0.5, "acc_std": 0.056795282744360326, "f1": 0.4910714285714286, "f1_std": 0.05895481597414775, "bacc": 0.49954212454212454, "bacc_std": 0.0570237074833746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06594335923517193, "f1": 0.3864994425863991, "f1_std": 0.06511280282532958, "bacc": 0.3839285714285714, "bacc_std": 0.06589713106690832} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 0.005994842503189409, "split": "test", "acc": 0.3076923076923077, "acc_std": 0.059864294264700475, "f1": 0.30356002554278416, "f1_std": 0.05814421154858443, "bacc": 0.30815018315018317, "bacc_std": 0.05998259139173375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06847990246357043, "f1": 0.46543040293040294, "f1_std": 0.06603380182279563, "bacc": 0.46543040293040294, "bacc_std": 0.06896882177063035} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 2.782559402207126, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.058104310913610895, "f1": 0.3076765188834154, "f1_std": 0.05813108157155638, "bacc": 0.326007326007326, "bacc_std": 0.057818611182935686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0670802310405212, "f1": 0.4807407407407407, "f1_std": 0.0665760551078589, "bacc": 0.4789377289377289, "bacc_std": 0.0670969291722002} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.0638910291783397, "f1": 0.43980519480519475, "f1_std": 0.06674539385704154, "bacc": 0.44024725274725274, "bacc_std": 0.06405740090570425} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06488007826916531, "f1": 0.5234126984126983, "f1_std": 0.06119566832942112, "bacc": 0.5247252747252747, "bacc_std": 0.06520531567246199} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06430524240824718, "f1": 0.54683908045977, "f1_std": 0.06573199383453367, "bacc": 0.5544871794871795, "bacc_std": 0.06408797682664334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06288991979510167, "f1": 0.43863346104725415, "f1_std": 0.06359884722091162, "bacc": 0.4416208791208791, "bacc_std": 0.06302106181697256} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.05546280538401469, "f1": 0.39299242424242425, "f1_std": 0.04666623187044457, "bacc": 0.4340659340659341, "bacc_std": 0.05412728667374261} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06825036575752179, "f1": 0.45478543195934495, "f1_std": 0.06957173577803566, "bacc": 0.44207875457875456, "bacc_std": 0.06853747495547344} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06389445577896365, "f1": 0.3663961038961039, "f1_std": 0.06209892121098172, "bacc": 0.36744505494505497, "bacc_std": 0.06427161220333454} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.061182564616218495, "f1": 0.44024699808904477, "f1_std": 0.06299532871827582, "bacc": 0.46222527472527475, "bacc_std": 0.061261740296321116} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06588472764820467, "f1": 0.47777777777777775, "f1_std": 0.06814378584143704, "bacc": 0.4789377289377289, "bacc_std": 0.06594883326150192} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.07081807469948623, "f1": 0.5329697106012896, "f1_std": 0.07317491575572407, "bacc": 0.5338827838827839, "bacc_std": 0.0708554841087877} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 2.782559402207126, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06865698118246066, "f1": 0.41166109253065775, "f1_std": 0.06784016599348276, "bacc": 0.4033882783882784, "bacc_std": 0.06882098726636592} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.060999211848628394, "f1": 0.47914746543778797, "f1_std": 0.06413372051028102, "bacc": 0.49793956043956045, "bacc_std": 0.061073188170595684} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.061458734411822695, "f1": 0.41131784881784883, "f1_std": 0.062236455696746816, "bacc": 0.4237637362637362, "bacc_std": 0.06178382669296002} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06274390278965843, "f1": 0.4033994032395567, "f1_std": 0.06510499841712931, "bacc": 0.4283424908424909, "bacc_std": 0.06351567763967293} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06592383986633284, "f1": 0.4362824675324676, "f1_std": 0.06662216777772453, "bacc": 0.43887362637362637, "bacc_std": 0.06571131729858465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06773123880499435, "f1": 0.49181547619047616, "f1_std": 0.07042791099656556, "bacc": 0.49404761904761907, "bacc_std": 0.0677969807819352} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06436830047864302, "f1": 0.540994623655914, "f1_std": 0.06585721628860068, "bacc": 0.5428113553113553, "bacc_std": 0.06463631016361242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06718886213701102, "f1": 0.5046620046620047, "f1_std": 0.06782734387856687, "bacc": 0.5059523809523809, "bacc_std": 0.06713380102702346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06327927205799186, "f1": 0.3704415073569206, "f1_std": 0.06346109201885287, "bacc": 0.3823260073260073, "bacc_std": 0.06276503997570618} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06408320220411685, "f1": 0.47827903091060986, "f1_std": 0.06565823908249702, "bacc": 0.48031135531135527, "bacc_std": 0.06411641006928165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05841104854148723, "f1": 0.401328320802005, "f1_std": 0.05882507737576725, "bacc": 0.4164377289377289, "bacc_std": 0.05764627424432652} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06320222076841266, "f1": 0.3848007246376812, "f1_std": 0.06351056070885817, "bacc": 0.38278388278388276, "bacc_std": 0.06332834729307044} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.060528686533668684, "f1": 0.40366653031011374, "f1_std": 0.06167128787639694, "bacc": 0.4194139194139194, "bacc_std": 0.05988501541514865} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06487670372630328, "f1": 0.4349655529378168, "f1_std": 0.06655303647135749, "bacc": 0.4448260073260073, "bacc_std": 0.06491580068985833} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05868725404193186, "f1": 0.49271206690561525, "f1_std": 0.057744236978098086, "bacc": 0.5290750915750916, "bacc_std": 0.05770404579989048} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06300065745876535, "f1": 0.46779812637105767, "f1_std": 0.0648231564668786, "bacc": 0.48031135531135527, "bacc_std": 0.0629677853988796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06023301646264571, "f1": 0.5733516483516483, "f1_std": 0.06160115460403302, "bacc": 0.5810439560439561, "bacc_std": 0.06063064244856499} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06090387955113121, "f1": 0.38267682781614676, "f1_std": 0.061091420333888626, "bacc": 0.40293040293040294, "bacc_std": 0.06088914447362108} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06540509860153812, "f1": 0.4822222222222222, "f1_std": 0.06342096999747539, "bacc": 0.4816849816849817, "bacc_std": 0.06554600760403767} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.05445234331328352, "f1": 0.3639393939393939, "f1_std": 0.04753937933158865, "bacc": 0.3983516483516484, "bacc_std": 0.05317522436410746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06723450966535584, "f1": 0.5182919254658385, "f1_std": 0.06808638282870305, "bacc": 0.5222069597069597, "bacc_std": 0.06712089745563525} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.0596746107787695, "f1": 0.3719087425983978, "f1_std": 0.06217334274404628, "bacc": 0.3823260073260073, "bacc_std": 0.05933683029543273} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06489892004097762, "f1": 0.43480519480519486, "f1_std": 0.06462193816307538, "bacc": 0.42422161172161177, "bacc_std": 0.06494660844909432} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06659401860234497, "f1": 0.49849206349206354, "f1_std": 0.06464129008748927, "bacc": 0.4965659340659341, "bacc_std": 0.06627537590077698} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.0510490395391512, "f1": 0.32669343538908757, "f1_std": 0.04628185412564457, "bacc": 0.36561355311355315, "bacc_std": 0.05119345013214872} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 21.54434690031882, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06614865811499279, "f1": 0.38164191841611195, "f1_std": 0.06590657109001002, "bacc": 0.38118131868131866, "bacc_std": 0.06593314902413683} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06556689524050449, "f1": 0.5742753623188406, "f1_std": 0.06621427182592991, "bacc": 0.5753205128205129, "bacc_std": 0.0656954316976289} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06521783026608126, "f1": 0.48236263736263735, "f1_std": 0.06513602506729406, "bacc": 0.4835164835164835, "bacc_std": 0.06555535138089703} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05997873143750266, "f1": 0.5128019323671498, "f1_std": 0.06246970640210205, "bacc": 0.532051282051282, "bacc_std": 0.05961990969326364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06370321116259396, "f1": 0.506328320802005, "f1_std": 0.06571401521041073, "bacc": 0.5157967032967032, "bacc_std": 0.06363553741328075} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 0.000774263682681127, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.05847513758834888, "f1": 0.328125, "f1_std": 0.050370172453118096, "bacc": 0.3585164835164835, "bacc_std": 0.057066394320170855} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0721678168367469, "f1": 0.48115384615384615, "f1_std": 0.07236177232271801, "bacc": 0.48511904761904756, "bacc_std": 0.07254209348425958} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 0.000774263682681127, "split": "test", "acc": 0.5, "acc_std": 0.062849719793381, "f1": 0.4951258419000354, "f1_std": 0.06294241909834386, "bacc": 0.4965659340659341, "bacc_std": 0.06257934154719906} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06136122313107858, "f1": 0.5867532467532468, "f1_std": 0.06418117339407509, "bacc": 0.592948717948718, "bacc_std": 0.06138665671038226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06053073941561579, "f1": 0.5286150630978217, "f1_std": 0.059657052042307306, "bacc": 0.5336538461538461, "bacc_std": 0.060177059922395186} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06420210092601028, "f1": 0.5211137060283171, "f1_std": 0.0672598171914319, "bacc": 0.5322802197802198, "bacc_std": 0.06391657247008882} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06788870111914773, "f1": 0.5415745544777802, "f1_std": 0.06872070420282618, "bacc": 0.5412087912087912, "bacc_std": 0.06813088263859277} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06500814696326633, "f1": 0.5226070226070225, "f1_std": 0.06434953042249686, "bacc": 0.5233516483516484, "bacc_std": 0.06540019331689348} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0685245604481735, "f1": 0.49117778772951187, "f1_std": 0.06549190072806993, "bacc": 0.4832875457875458, "bacc_std": 0.06886195000867194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06854656560911518, "f1": 0.5700773411371237, "f1_std": 0.06787105757724143, "bacc": 0.5592948717948718, "bacc_std": 0.06873593959157892} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06374054054427009, "f1": 0.4184373900725402, "f1_std": 0.06460209767844857, "bacc": 0.43864468864468864, "bacc_std": 0.06320186158034441} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.05347846992540204, "f1": 0.46475367161866016, "f1_std": 0.05790999732145135, "bacc": 0.5020604395604396, "bacc_std": 0.05394009386315277} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 166.81005372000556, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06030465062973141, "f1": 0.43522764052217067, "f1_std": 0.057761096469901, "bacc": 0.4358974358974359, "bacc_std": 0.05983299128177099} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.0669303599043295, "f1": 0.49064558629776023, "f1_std": 0.07045954211844949, "bacc": 0.5011446886446886, "bacc_std": 0.06711284997165604} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06501706648958597, "f1": 0.4654459370810873, "f1_std": 0.06773652766245995, "bacc": 0.4787087912087912, "bacc_std": 0.06491104491994917} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06094308140878366, "f1": 0.5439673229995811, "f1_std": 0.06396480555849382, "bacc": 0.553113553113553, "bacc_std": 0.060655109924712874} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06344045104192093, "f1": 0.45228937728937735, "f1_std": 0.06318091578482546, "bacc": 0.459478021978022, "bacc_std": 0.06326273236164247} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06303106916809656, "f1": 0.5276009588788494, "f1_std": 0.06490237937393359, "bacc": 0.5485347985347986, "bacc_std": 0.06257902372305989} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06421206544811049, "f1": 0.4874231950844854, "f1_std": 0.06589868350070553, "bacc": 0.5038919413919414, "bacc_std": 0.06468712386018521} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06385415844168535, "f1": 0.4648477583537006, "f1_std": 0.06598395516900843, "bacc": 0.47870879120879123, "bacc_std": 0.06382203050041937} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 166.81005372000556, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06190630446525119, "f1": 0.3862524943087603, "f1_std": 0.06275844331213103, "bacc": 0.38118131868131866, "bacc_std": 0.06169263015593813} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 21.54434690031882, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.065074593066126, "f1": 0.5122235757121439, "f1_std": 0.06764844005954385, "bacc": 0.5160256410256411, "bacc_std": 0.06507407745732914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06508706049428704, "f1": 0.4147537647537648, "f1_std": 0.06738860486029774, "bacc": 0.4226190476190476, "bacc_std": 0.06520289701117521} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.05971107704516383, "f1": 0.5569147244559538, "f1_std": 0.06472420010160243, "bacc": 0.579441391941392, "bacc_std": 0.05991358625551488} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06516587846393487, "f1": 0.44333333333333336, "f1_std": 0.06437384724700111, "bacc": 0.44047619047619047, "bacc_std": 0.06525937070170837} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06466943709879155, "f1": 0.3750158413616158, "f1_std": 0.06389356228751095, "bacc": 0.38095238095238093, "bacc_std": 0.06434190217932076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06123109537988127, "f1": 0.48295019157088126, "f1_std": 0.05771678202255695, "bacc": 0.5098443223443223, "bacc_std": 0.06015470265674004} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06576986054579267, "f1": 0.5655340532498727, "f1_std": 0.06595349689669233, "bacc": 0.5608974358974359, "bacc_std": 0.06587028148696997} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06320067597571234, "f1": 0.4230331262939959, "f1_std": 0.06148754494921732, "bacc": 0.4223901098901099, "bacc_std": 0.06314911870028754} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06244292068117303, "f1": 0.465752688172043, "f1_std": 0.06522002043118817, "bacc": 0.48168498168498164, "bacc_std": 0.06307552338371422} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06849665261980575, "f1": 0.5089999444722083, "f1_std": 0.07014339556876967, "bacc": 0.5233516483516484, "bacc_std": 0.06895745994264145} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 166.81005372000556, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06426420134075034, "f1": 0.3691931216931217, "f1_std": 0.06309550935143939, "bacc": 0.3676739926739927, "bacc_std": 0.06460448611761586} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | train | 100 | 7.4615 | 32.9 | 0.74874 | 0.1407 | 0.74672 | 0.14292 | 0.74908 | 0.14091 | +| flat_mae | patch | logistic | aabc_age | test | 100 | 7.4615 | 32.9 | 0.47385 | 0.063135 | 0.4644 | 0.065709 | 0.4727 | 0.063495 | + + +done! total time: 0:05:39 diff --git a/data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic/config.yaml b/data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b5e1632f07262746f6971455e37d0a4f2749f90 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic +remote_dir: null diff --git a/data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv b/data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..3296d643c4e1fe251604251e0bdd170b62dc347a --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_sex,,0.3593813663804626,train,0.9924385633270322,0.0035931864326854005,0.9922570257611241,0.003679166409247739,0.9922570257611241,0.0037124576813549258 +flat_mae,patch,logistic,aabc_sex,,0.3593813663804626,test,0.8909090909090909,0.04151389690650152,0.8879076086956521,0.04243038508092483,0.8939393939393939,0.041469990366650715 +flat_mae,patch,logistic,aabc_sex,1,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,1,166.81005372000556,test,0.8,0.053450884228358524,0.7975911676145868,0.05380342899941615,0.8036684782608696,0.05328333909003386 +flat_mae,patch,logistic,aabc_sex,2,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,2,2.782559402207126,test,0.8727272727272727,0.044801770921314585,0.8683760683760684,0.04704920131178934,0.8661684782608696,0.047810608058988624 +flat_mae,patch,logistic,aabc_sex,3,0.3593813663804626,train,0.9924385633270322,0.0035504548556786783,0.9922477212110554,0.0036410453963016884,0.9922477212110554,0.003719298294189245 +flat_mae,patch,logistic,aabc_sex,3,0.3593813663804626,test,0.8363636363636363,0.049868269445817535,0.8281846581048247,0.053673058580491324,0.8226902173913043,0.05371832225504935 +flat_mae,patch,logistic,aabc_sex,4,0.3593813663804626,train,0.9924385633270322,0.003763476753808018,0.9922477212110554,0.003858930411145673,0.9922477212110554,0.003900458092516697 +flat_mae,patch,logistic,aabc_sex,4,0.3593813663804626,test,0.8363636363636363,0.049443100279910175,0.8354935194416749,0.04932763025388611,0.8471467391304348,0.04665864460056379 +flat_mae,patch,logistic,aabc_sex,5,0.046415888336127774,train,0.945179584120983,0.010235612563594492,0.9437615704675844,0.010507067181863847,0.9434918960110203,0.010653629945942187 +flat_mae,patch,logistic,aabc_sex,5,0.046415888336127774,test,0.8363636363636363,0.04955138913581792,0.8307692307692308,0.051788466151093455,0.8288043478260869,0.0519619909651812 +flat_mae,patch,logistic,aabc_sex,6,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,6,2.782559402207126,test,0.9454545454545454,0.032597297459343054,0.9435897435897436,0.033934850468303086,0.9408967391304348,0.03521115110421031 +flat_mae,patch,logistic,aabc_sex,7,0.3593813663804626,train,0.9905482041587902,0.00445238779331337,0.9903037190461352,0.004569098278855648,0.9900055687446877,0.004688985261360803 +flat_mae,patch,logistic,aabc_sex,7,0.3593813663804626,test,0.8909090909090909,0.04129961479019122,0.8863636363636364,0.04406140807088731,0.8817934782608696,0.045193321940994576 +flat_mae,patch,logistic,aabc_sex,8,0.046415888336127774,train,0.9527410207939508,0.009029131129734686,0.9515185952306762,0.00927427343441748,0.9512441747999649,0.009439822158657293 +flat_mae,patch,logistic,aabc_sex,8,0.046415888336127774,test,0.8,0.05186805893023259,0.795677136102668,0.052983078744054085,0.7975543478260869,0.0531022986078622 +flat_mae,patch,logistic,aabc_sex,9,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,9,21.54434690031882,test,0.8909090909090909,0.04117185912089803,0.8863636363636364,0.043619145845567094,0.8817934782608696,0.04459627950072285 +flat_mae,patch,logistic,aabc_sex,10,0.3593813663804626,train,0.9905482041587902,0.0043685525536722005,0.9903037190461352,0.004487320861353121,0.9900055687446877,0.004695403250802831 +flat_mae,patch,logistic,aabc_sex,10,0.3593813663804626,test,0.8727272727272727,0.04484779033041271,0.8683760683760684,0.0466842940400422,0.8661684782608696,0.04687993867729493 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,train,0.941398865784499,0.010330292430470504,0.9398080346491953,0.010626610096186052,0.9390075910782849,0.010799975078199962 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,test,0.9272727272727272,0.03636952679564093,0.9252717391304348,0.03746778687156803,0.9252717391304348,0.03765877233902594 +flat_mae,patch,logistic,aabc_sex,12,0.046415888336127774,train,0.9489603024574669,0.009570682181095039,0.9477656971705049,0.009786551973879865,0.9485843664820188,0.009795592702017198 +flat_mae,patch,logistic,aabc_sex,12,0.046415888336127774,test,0.8,0.05311793832502184,0.790003471017008,0.057460511757611854,0.7853260869565217,0.05701289687081716 +flat_mae,patch,logistic,aabc_sex,13,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,13,2.782559402207126,test,0.8909090909090909,0.042962309062712166,0.8891129032258065,0.04338884389912486,0.8940217391304348,0.042343867580525794 +flat_mae,patch,logistic,aabc_sex,14,0.046415888336127774,train,0.947069943289225,0.009596826810850619,0.9458625730994152,0.009784418861875463,0.9469503795539143,0.009632820722002333 +flat_mae,patch,logistic,aabc_sex,14,0.046415888336127774,test,0.9090909090909091,0.040123363487133794,0.9071259709557582,0.04098591837723501,0.9096467391304348,0.04061030841207201 +flat_mae,patch,logistic,aabc_sex,15,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,15,21.54434690031882,test,0.7818181818181819,0.05345771171822414,0.76890756302521,0.05847831469095084,0.7635869565217391,0.05744728200713319 +flat_mae,patch,logistic,aabc_sex,16,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,16,21.54434690031882,test,0.8727272727272727,0.047980381665774466,0.8663658451926415,0.052201839356174874,0.8600543478260869,0.053256339560986896 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,train,0.9395085066162571,0.010247336283891656,0.9379817696884434,0.01051817221491892,0.9379817696884434,0.010671753324546193 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,test,0.8909090909090909,0.03974969618886964,0.884453781512605,0.044023137653841256,0.8756793478260869,0.04503130792582813 +flat_mae,patch,logistic,aabc_sex,18,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,18,21.54434690031882,test,0.9090909090909091,0.03833382728152235,0.9071259709557582,0.03911589308652651,0.9096467391304348,0.038654286596477364 +flat_mae,patch,logistic,aabc_sex,19,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,19,21.54434690031882,test,0.8,0.05046679294429796,0.7931623931623932,0.052656448579322994,0.7914402173913043,0.05252342306217075 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,train,0.9357277882797732,0.010866241622223492,0.9341056302939711,0.011141039651108728,0.9341056302939711,0.011273044011943019 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,test,0.8727272727272727,0.04552127465571445,0.8711943793911007,0.04578711187544698,0.8783967391304348,0.04439328535357895 +flat_mae,patch,logistic,aabc_sex,21,0.046415888336127774,train,0.9546313799621928,0.00911567099986663,0.9535964912280701,0.009299641797509943,0.9547026583428588,0.009150974943658517 +flat_mae,patch,logistic,aabc_sex,21,0.046415888336127774,test,0.8,0.049832459798677155,0.7861435136090491,0.055746276005439875,0.7792119565217391,0.054371646597021796 +flat_mae,patch,logistic,aabc_sex,22,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,22,21.54434690031882,test,0.9272727272727272,0.03368841181933472,0.9260752688172043,0.03402031760445941,0.9313858695652174,0.032574051201196934 +flat_mae,patch,logistic,aabc_sex,23,0.005994842503189409,train,0.8941398865784499,0.013170166919727305,0.89091176903815,0.013642211367451439,0.8890354348017233,0.013933853023098209 +flat_mae,patch,logistic,aabc_sex,23,0.005994842503189409,test,0.8727272727272727,0.04682601523169933,0.8699763593380614,0.047817835710819635,0.8722826086956521,0.04766723642691978 +flat_mae,patch,logistic,aabc_sex,24,0.3593813663804626,train,0.9924385633270322,0.0038877110292885393,0.9922477212110554,0.003987284648168938,0.9922477212110554,0.0040638333289163674 +flat_mae,patch,logistic,aabc_sex,24,0.3593813663804626,test,0.8363636363636363,0.04940985949055138,0.8328267477203647,0.05046526822405365,0.8349184782608696,0.050403559162165976 +flat_mae,patch,logistic,aabc_sex,25,0.3593813663804626,train,0.9924385633270322,0.003729298727341673,0.9922477212110554,0.0038237244128849243,0.9922477212110554,0.003875442806608021 +flat_mae,patch,logistic,aabc_sex,25,0.3593813663804626,test,0.9454545454545454,0.030435212316662606,0.9442755825734549,0.03092647824872064,0.9470108695652174,0.029690384367641234 +flat_mae,patch,logistic,aabc_sex,26,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,26,2.782559402207126,test,0.8181818181818182,0.04847866021114616,0.8106060606060606,0.051597052806010305,0.8070652173913043,0.05156378485539002 +flat_mae,patch,logistic,aabc_sex,27,0.3593813663804626,train,0.994328922495274,0.0032240816841667477,0.9941893034853195,0.00329975800144141,0.9944898736774231,0.003136892516552721 +flat_mae,patch,logistic,aabc_sex,27,0.3593813663804626,test,0.8727272727272727,0.042876335726006895,0.8683760683760684,0.044835704108910086,0.8661684782608696,0.045187891208528626 +flat_mae,patch,logistic,aabc_sex,28,0.3593813663804626,train,0.9924385633270322,0.0036465317444938543,0.9922381665052675,0.0037486063037188446,0.9916395556727923,0.004051641815072514 +flat_mae,patch,logistic,aabc_sex,28,0.3593813663804626,test,0.8363636363636363,0.04564149192482608,0.8250265111346766,0.05200327821794475,0.8165760869565217,0.05159495698979989 +flat_mae,patch,logistic,aabc_sex,29,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,29,2.782559402207126,test,0.8363636363636363,0.04921163606269292,0.8307692307692308,0.051582795908121946,0.8288043478260869,0.05171295257956842 +flat_mae,patch,logistic,aabc_sex,30,0.3593813663804626,train,0.9924385633270322,0.003873660982776631,0.9922477212110554,0.003970451131084582,0.9922477212110554,0.003958846242182145 +flat_mae,patch,logistic,aabc_sex,30,0.3593813663804626,test,0.8727272727272727,0.04567497099294767,0.8711943793911007,0.04603273823889641,0.8783967391304348,0.0448009871161628 +flat_mae,patch,logistic,aabc_sex,31,0.005994842503189409,train,0.9035916824196597,0.012660080701798863,0.900308545034642,0.013209668015134833,0.8972053694422463,0.013547960518076989 +flat_mae,patch,logistic,aabc_sex,31,0.005994842503189409,test,0.8,0.051004209840831774,0.7931623931623932,0.053088832890884584,0.7914402173913043,0.05292184989758564 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,train,0.941398865784499,0.010405549995734327,0.9400965089477165,0.010610697373585143,0.9414402532313374,0.010502689901242234 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,test,0.8727272727272727,0.043579902866852475,0.8663658451926415,0.04710202620608417,0.8600543478260869,0.04795752350494955 +flat_mae,patch,logistic,aabc_sex,33,0.3593813663804626,train,0.9924385633270322,0.003769290858379831,0.9922477212110554,0.003863830131851633,0.9922477212110554,0.003901175167986353 +flat_mae,patch,logistic,aabc_sex,33,0.3593813663804626,test,0.9090909090909091,0.03950496987251224,0.905982905982906,0.041324773334540425,0.9035326086956521,0.0420820692146386 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,train,0.9924385633270322,0.0037467262067826276,0.9922570257611241,0.0038308015028386245,0.9928558867493186,0.003554408799980812 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,test,0.8545454545454545,0.045012762193966466,0.8484848484848485,0.047371752217409795,0.8444293478260869,0.04764040884954788 +flat_mae,patch,logistic,aabc_sex,35,0.3593813663804626,train,0.9886578449905482,0.004851972765467586,0.9883572497579012,0.004989313613317679,0.98776341627832,0.0052951774909159215 +flat_mae,patch,logistic,aabc_sex,35,0.3593813663804626,test,0.8909090909090909,0.039984856637614426,0.8879076086956521,0.0411721203533657,0.8879076086956521,0.04142525201979005 +flat_mae,patch,logistic,aabc_sex,36,0.3593813663804626,train,0.9905482041587902,0.00416979297215421,0.9903037190461352,0.004281427352691946,0.9900055687446877,0.00446769230371324 +flat_mae,patch,logistic,aabc_sex,36,0.3593813663804626,test,0.9272727272727272,0.035203512221471216,0.9260752688172043,0.035507057213833945,0.9313858695652174,0.033797160282823945 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,train,0.9565217391304348,0.009088968762648836,0.9555041124045041,0.009287027476799914,0.9563366452709634,0.009216275651039278 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,test,0.7818181818181819,0.05562399149424701,0.7727272727272727,0.05934268752189608,0.7697010869565217,0.05898269618302792 +flat_mae,patch,logistic,aabc_sex,38,0.046415888336127774,train,0.945179584120983,0.010194950077669178,0.9438299336914217,0.010428683963150666,0.9441000615492834,0.010437354691392947 +flat_mae,patch,logistic,aabc_sex,38,0.046415888336127774,test,0.8545454545454545,0.04630304775503397,0.84593837535014,0.05097263430179671,0.8383152173913043,0.05095856139016274 +flat_mae,patch,logistic,aabc_sex,39,0.046415888336127774,train,0.947069943289225,0.0094226514753718,0.9459242384415433,0.009605001120228414,0.9475585450921773,0.00954463897948555 +flat_mae,patch,logistic,aabc_sex,39,0.046415888336127774,test,0.8363636363636363,0.04612595045942274,0.8307692307692308,0.04830300068921306,0.8288043478260869,0.048668960193369214 +flat_mae,patch,logistic,aabc_sex,40,0.046415888336127774,train,0.9489603024574669,0.009194778504348328,0.9477037313678753,0.0094090031358967,0.9479762009437558,0.00943964886183727 +flat_mae,patch,logistic,aabc_sex,40,0.046415888336127774,test,0.8363636363636363,0.04630419005369733,0.8281846581048247,0.04982086786145176,0.8226902173913043,0.04992663686849045 +flat_mae,patch,logistic,aabc_sex,41,0.046415888336127774,train,0.947069943289225,0.010012355815935018,0.9458625730994152,0.010223035958752711,0.9469503795539143,0.01016827858412198 +flat_mae,patch,logistic,aabc_sex,41,0.046415888336127774,test,0.8909090909090909,0.040524886752229074,0.8879076086956521,0.04170419435567948,0.8879076086956521,0.04164180767130103 +flat_mae,patch,logistic,aabc_sex,42,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,42,2.782559402207126,test,0.7636363636363637,0.057367886314255025,0.7555555555555555,0.059738701059157934,0.7540760869565217,0.059416502803483685 +flat_mae,patch,logistic,aabc_sex,43,0.005994842503189409,train,0.9017013232514177,0.012845774905076907,0.8985663293902475,0.013344760355020798,0.8961795480524049,0.013694003519534323 +flat_mae,patch,logistic,aabc_sex,43,0.005994842503189409,test,0.8727272727272727,0.044383587168201656,0.8683760683760684,0.046290328368371866,0.8661684782608696,0.046607154102812666 +flat_mae,patch,logistic,aabc_sex,44,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,44,166.81005372000556,test,0.8545454545454545,0.047894981534586115,0.8521505376344086,0.048377449138828396,0.8566576086956521,0.04773397944128522 +flat_mae,patch,logistic,aabc_sex,45,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,45,2.782559402207126,test,0.9272727272727272,0.03444530938811518,0.9260752688172043,0.034810760335883546,0.9313858695652174,0.033219691220591895 +flat_mae,patch,logistic,aabc_sex,46,0.005994842503189409,train,0.9017013232514177,0.013631854837520342,0.8984254992319508,0.01421989547594844,0.8955713825141417,0.014616559241145974 +flat_mae,patch,logistic,aabc_sex,46,0.005994842503189409,test,0.8727272727272727,0.04528899306191276,0.8699763593380614,0.04617392943605174,0.8722826086956521,0.04600226583486681 +flat_mae,patch,logistic,aabc_sex,47,0.005994842503189409,train,0.8979206049149339,0.013061236096919485,0.8946650343667955,0.013534450164388548,0.8923034086579326,0.013754824789637863 +flat_mae,patch,logistic,aabc_sex,47,0.005994842503189409,test,0.8545454545454545,0.044238437346305226,0.8484848484848485,0.04713117545256556,0.8444293478260869,0.04788467976651367 +flat_mae,patch,logistic,aabc_sex,48,0.3593813663804626,train,0.9905482041587902,0.004503994048151411,0.9903037190461352,0.004623324744876472,0.9900055687446877,0.004778851360697248 +flat_mae,patch,logistic,aabc_sex,48,0.3593813663804626,test,0.9454545454545454,0.029973409703538883,0.9442755825734549,0.030459293879957234,0.9470108695652174,0.029432999141188104 +flat_mae,patch,logistic,aabc_sex,49,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,49,166.81005372000556,test,0.8181818181818182,0.04958446837601699,0.8151881720430108,0.050466875716340925,0.8192934782608696,0.05034578805722857 +flat_mae,patch,logistic,aabc_sex,50,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,50,2.782559402207126,test,0.8727272727272727,0.0444960170935566,0.8699763593380614,0.04531868706132281,0.8722826086956521,0.04488204493476197 +flat_mae,patch,logistic,aabc_sex,51,0.046415888336127774,train,0.943289224952741,0.009398278735884941,0.9419956140350878,0.00960443833941058,0.943074240159442,0.009658259220726542 +flat_mae,patch,logistic,aabc_sex,51,0.046415888336127774,test,0.8727272727272727,0.042056412331317766,0.8639095086603039,0.04767537067137091,0.8539402173913043,0.04816555804213136 +flat_mae,patch,logistic,aabc_sex,52,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,52,1291.5496650148827,test,0.8363636363636363,0.04972082058856227,0.8328267477203647,0.05093691291616957,0.8349184782608696,0.05129882457128467 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,train,0.9395085066162571,0.01072036267742937,0.938056206088993,0.010966033690414014,0.9385899352267065,0.01097844436861296 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,test,0.8727272727272727,0.0469079492461495,0.8699763593380614,0.04790591877316191,0.8722826086956521,0.0475857403176399 +flat_mae,patch,logistic,aabc_sex,54,0.005994842503189409,train,0.9054820415879017,0.012672500904995424,0.9024676244136995,0.013183746916340366,0.900055687446877,0.013588363239771752 +flat_mae,patch,logistic,aabc_sex,54,0.005994842503189409,test,0.8545454545454545,0.047584982699507145,0.8521505376344086,0.048061229422214816,0.8566576086956521,0.047089442909126124 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,train,0.9924385633270322,0.0038414777454459014,0.9922477212110554,0.00393856136886008,0.9922477212110554,0.003986471378171804 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,test,0.8545454545454545,0.04671179085582863,0.8505434782608696,0.04816033039336056,0.8505434782608696,0.04840516099472953 +flat_mae,patch,logistic,aabc_sex,56,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,56,2.782559402207126,test,0.8363636363636363,0.04941992103434929,0.8328267477203647,0.050604884105487934,0.8349184782608696,0.05064134931322486 +flat_mae,patch,logistic,aabc_sex,57,0.046415888336127774,train,0.947069943289225,0.009264765802640023,0.9455985191279309,0.009544547474912726,0.9445177174008617,0.009769639118219172 +flat_mae,patch,logistic,aabc_sex,57,0.046415888336127774,test,0.8727272727272727,0.04416436617533892,0.8699763593380614,0.04517045994302185,0.8722826086956521,0.04507075072142798 +flat_mae,patch,logistic,aabc_sex,58,0.005994842503189409,train,0.9111531190926276,0.011633205814288271,0.9083820662768032,0.012050872216621936,0.906173979307717,0.012310876181780867 +flat_mae,patch,logistic,aabc_sex,58,0.005994842503189409,test,0.7818181818181819,0.05541608204086904,0.7782258064516129,0.056038183621799256,0.7819293478260869,0.05582345230517754 +flat_mae,patch,logistic,aabc_sex,59,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,59,2.782559402207126,test,0.8181818181818182,0.05443842833273973,0.8151881720430108,0.055262156872824694,0.8192934782608696,0.054865759819114054 +flat_mae,patch,logistic,aabc_sex,60,0.3593813663804626,train,0.9924385633270322,0.003996952466305613,0.9922477212110554,0.004097743922817043,0.9922477212110554,0.004136604617279269 +flat_mae,patch,logistic,aabc_sex,60,0.3593813663804626,test,0.8545454545454545,0.047990026512058706,0.8484848484848485,0.05085297590945147,0.8444293478260869,0.051152358690698986 +flat_mae,patch,logistic,aabc_sex,61,0.046415888336127774,train,0.9376181474480151,0.010595709168505189,0.9360823383385143,0.010843887803433958,0.9363477827603388,0.01088974555267544 +flat_mae,patch,logistic,aabc_sex,61,0.046415888336127774,test,0.9272727272727272,0.03404762828356093,0.9260752688172043,0.03433792285114079,0.9313858695652174,0.03259288937000265 +flat_mae,patch,logistic,aabc_sex,62,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,62,21.54434690031882,test,0.8727272727272727,0.04349589512312607,0.8683760683760684,0.04527019211013552,0.8661684782608696,0.045494723367764625 +flat_mae,patch,logistic,aabc_sex,63,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,63,21.54434690031882,test,0.9454545454545454,0.03216486456363053,0.9435897435897436,0.03376846198953986,0.9408967391304348,0.035433770970321826 +flat_mae,patch,logistic,aabc_sex,64,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,64,2.782559402207126,test,0.8363636363636363,0.05080065074093516,0.8354935194416749,0.050627014365217646,0.8471467391304348,0.04786581575776505 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,train,0.9395085066162571,0.009964965913991064,0.938056206088993,0.010197836260036838,0.9385899352267065,0.010241570124307774 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,test,0.9090909090909091,0.03826477568013747,0.9045470322804582,0.041497043154271794,0.8974184782608696,0.043090696607070364 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,train,0.941398865784499,0.00956300250064867,0.9398830580860384,0.009800268427240338,0.939615756616548,0.009857801332409868 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,test,0.8545454545454545,0.04830233983005222,0.8484848484848485,0.05096910285469237,0.8444293478260869,0.05120697113273936 +flat_mae,patch,logistic,aabc_sex,67,0.046415888336127774,train,0.9395085066162571,0.010177561744790764,0.9379053320421398,0.0104536877003987,0.9373736041501802,0.010621197006081391 +flat_mae,patch,logistic,aabc_sex,67,0.046415888336127774,test,0.9272727272727272,0.03524114800055036,0.9252717391304348,0.03626660727308106,0.9252717391304348,0.03661671772885658 +flat_mae,patch,logistic,aabc_sex,68,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,68,2.782559402207126,test,0.8727272727272727,0.04442356607860849,0.8683760683760684,0.04641913087996633,0.8661684782608696,0.04692960516721568 +flat_mae,patch,logistic,aabc_sex,69,0.3593813663804626,train,0.9924385633270322,0.0038095141569214354,0.9922477212110554,0.0039057722826572054,0.9922477212110554,0.003967993330971652 +flat_mae,patch,logistic,aabc_sex,69,0.3593813663804626,test,0.9272727272727272,0.033965952811797054,0.9242424242424243,0.036114391077133996,0.9191576086956521,0.03793128351403021 +flat_mae,patch,logistic,aabc_sex,70,0.3593813663804626,train,0.9924385633270322,0.0038441168316199147,0.9922477212110554,0.003943017702656649,0.9922477212110554,0.004042980311896647 +flat_mae,patch,logistic,aabc_sex,70,0.3593813663804626,test,0.8909090909090909,0.04241457725400977,0.8891129032258065,0.04277599697638235,0.8940217391304348,0.04150395710584286 +flat_mae,patch,logistic,aabc_sex,71,0.005994842503189409,train,0.8979206049149339,0.012788374081193384,0.8948077772867875,0.013223176501339797,0.8929115741961957,0.013414986944013368 +flat_mae,patch,logistic,aabc_sex,71,0.005994842503189409,test,0.8363636363636363,0.04694803175800249,0.8250265111346766,0.052760368989171,0.8165760869565217,0.051720316697096806 +flat_mae,patch,logistic,aabc_sex,72,0.046415888336127774,train,0.947069943289225,0.009468158992780428,0.9458625730994152,0.00966024686131656,0.9469503795539143,0.009551495647732519 +flat_mae,patch,logistic,aabc_sex,72,0.046415888336127774,test,0.9272727272727272,0.03477808752506134,0.9252717391304348,0.035761751674160236,0.9252717391304348,0.035871101411233335 +flat_mae,patch,logistic,aabc_sex,73,0.3593813663804626,train,0.9924385633270322,0.00383898205794161,0.9922477212110554,0.003935623474583572,0.9922477212110554,0.003967163010392109 +flat_mae,patch,logistic,aabc_sex,73,0.3593813663804626,test,0.9454545454545454,0.029458473363237318,0.9435897435897436,0.03071120913241949,0.9408967391304348,0.03194790103308846 +flat_mae,patch,logistic,aabc_sex,74,0.046415888336127774,train,0.941398865784499,0.009722604390348898,0.9398080346491953,0.010017119501813367,0.9390075910782849,0.010342318662473848 +flat_mae,patch,logistic,aabc_sex,74,0.046415888336127774,test,0.8909090909090909,0.043400742652746356,0.8879076086956521,0.044767749053928864,0.8879076086956521,0.045200349914890275 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,train,0.943289224952741,0.010100534162973402,0.9418579090829157,0.010363121458146243,0.9418579090829157,0.010501063748363324 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,test,0.8727272727272727,0.04190637340824667,0.8699763593380614,0.042936848384759765,0.8722826086956521,0.04293056580145528 +flat_mae,patch,logistic,aabc_sex,76,0.3593813663804626,train,0.9905482041587902,0.0042499041198930465,0.9903155058088658,0.0043510922008691295,0.9906137342829509,0.00426726781680735 +flat_mae,patch,logistic,aabc_sex,76,0.3593813663804626,test,0.8545454545454545,0.0483455742075171,0.8484848484848485,0.05083040314833605,0.8444293478260869,0.05087286479204501 +flat_mae,patch,logistic,aabc_sex,77,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,77,166.81005372000556,test,0.9090909090909091,0.034982404077497635,0.9027925061859314,0.03962064665448564,0.8913043478260869,0.04182678748396455 +flat_mae,patch,logistic,aabc_sex,78,0.3593813663804626,train,0.9924385633270322,0.0036788623583598715,0.9922477212110554,0.0037719618152966287,0.9922477212110554,0.003820964724803401 +flat_mae,patch,logistic,aabc_sex,78,0.3593813663804626,test,0.9636363636363636,0.024264002517759075,0.9626358695652174,0.024998518331265686,0.9626358695652174,0.025354361017826555 +flat_mae,patch,logistic,aabc_sex,79,0.005994842503189409,train,0.9017013232514177,0.012693373180227901,0.8988378934980876,0.01313119108581331,0.8973958791289312,0.013403614295945436 +flat_mae,patch,logistic,aabc_sex,79,0.005994842503189409,test,0.8181818181818182,0.050394424458752606,0.8074229691876751,0.055318435918774944,0.8009510869565217,0.05468635144284033 +flat_mae,patch,logistic,aabc_sex,80,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,80,2.782559402207126,test,0.8181818181818182,0.05327117590046402,0.8131793478260869,0.05485174040443029,0.8131793478260869,0.05478136824841633 +flat_mae,patch,logistic,aabc_sex,81,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,81,2.782559402207126,test,0.8727272727272727,0.04458448753687339,0.8683760683760684,0.046470438980687596,0.8661684782608696,0.046846430703867147 +flat_mae,patch,logistic,aabc_sex,82,0.046415888336127774,train,0.9395085066162571,0.009855295742792713,0.9379817696884434,0.010095834933064025,0.9379817696884434,0.010161203921605763 +flat_mae,patch,logistic,aabc_sex,82,0.046415888336127774,test,0.9090909090909091,0.03585380784784349,0.9045470322804582,0.0386490359895269,0.8974184782608696,0.040520596997251974 +flat_mae,patch,logistic,aabc_sex,83,0.3593813663804626,train,0.994328922495274,0.0032799970505886255,0.9941893034853195,0.003357632660075038,0.9944898736774231,0.0032072874970619863 +flat_mae,patch,logistic,aabc_sex,83,0.3593813663804626,test,0.9090909090909091,0.03528695120890774,0.9027925061859314,0.04008916802328859,0.8913043478260869,0.042190919923694024 +flat_mae,patch,logistic,aabc_sex,84,0.3593813663804626,train,0.9924385633270322,0.0040348236323985735,0.9922570257611241,0.004125580534493553,0.9928558867493186,0.0038484352828928687 +flat_mae,patch,logistic,aabc_sex,84,0.3593813663804626,test,0.8363636363636363,0.052153251604215044,0.8307692307692308,0.05424111572194943,0.8288043478260869,0.05441833963316422 +flat_mae,patch,logistic,aabc_sex,85,0.046415888336127774,train,0.943289224952741,0.010143674342475956,0.9417862487895061,0.010429117927508417,0.9412497435446525,0.010609164730297074 +flat_mae,patch,logistic,aabc_sex,85,0.046415888336127774,test,0.8909090909090909,0.041407612986299275,0.89,0.04143442805860488,0.9001358695652174,0.03900091922932733 +flat_mae,patch,logistic,aabc_sex,86,0.3593813663804626,train,0.9905482041587902,0.004347457862273572,0.9903155058088658,0.004452254836200476,0.9906137342829509,0.004396414880380765 +flat_mae,patch,logistic,aabc_sex,86,0.3593813663804626,test,0.9636363636363636,0.02700170793496143,0.9626358695652174,0.027813505623025696,0.9626358695652174,0.02815792584741834 +flat_mae,patch,logistic,aabc_sex,87,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,87,21.54434690031882,test,0.8727272727272727,0.045668948888298075,0.8699763593380614,0.046701079908440815,0.8722826086956521,0.04667032621729881 +flat_mae,patch,logistic,aabc_sex,88,0.3593813663804626,train,0.9924385633270322,0.003879634186670061,0.9922477212110554,0.0039771071069947565,0.9922477212110554,0.003979595199970474 +flat_mae,patch,logistic,aabc_sex,88,0.3593813663804626,test,0.8363636363636363,0.05124693097039368,0.8307692307692308,0.053453595453193975,0.8288043478260869,0.053863714691638254 +flat_mae,patch,logistic,aabc_sex,89,0.3593813663804626,train,0.9924385633270322,0.003981826992925858,0.9922477212110554,0.00408199302374027,0.9922477212110554,0.0041227062828921406 +flat_mae,patch,logistic,aabc_sex,89,0.3593813663804626,test,0.8545454545454545,0.04504212903970848,0.8428571428571429,0.05208508067669945,0.8322010869565217,0.051709750486285004 +flat_mae,patch,logistic,aabc_sex,90,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,90,21.54434690031882,test,0.8545454545454545,0.04696143661479723,0.8484848484848485,0.04991683667811407,0.8444293478260869,0.05027720686702003 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,train,0.941398865784499,0.010300316456049865,0.9398830580860384,0.010580094377439977,0.939615756616548,0.010781823848783489 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,test,0.8727272727272727,0.04528643823048248,0.8699763593380614,0.04617618848117934,0.8722826086956521,0.04584580545374488 +flat_mae,patch,logistic,aabc_sex,92,0.3593813663804626,train,0.9905482041587902,0.004097503393145564,0.9903037190461352,0.004205325718622862,0.9900055687446877,0.004342493420888673 +flat_mae,patch,logistic,aabc_sex,92,0.3593813663804626,test,0.9090909090909091,0.03795109510718645,0.9045470322804582,0.041034562652099586,0.8974184782608696,0.04273976405756128 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,train,0.9924385633270322,0.0036212572042901743,0.9922477212110554,0.0037124309588230076,0.9922477212110554,0.003749874892448866 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,test,0.8727272727272727,0.044218002391980454,0.8663658451926415,0.04785892113706935,0.8600543478260869,0.048799138508657566 +flat_mae,patch,logistic,aabc_sex,94,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,94,21.54434690031882,test,0.8909090909090909,0.040571846282167166,0.8863636363636364,0.04317435367131348,0.8817934782608696,0.04433560063666444 +flat_mae,patch,logistic,aabc_sex,95,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,95,2.782559402207126,test,0.8909090909090909,0.04102068013023562,0.8879076086956521,0.042270179159709355,0.8879076086956521,0.04241131861004691 +flat_mae,patch,logistic,aabc_sex,96,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,96,21.54434690031882,test,0.8727272727272727,0.04446188822049992,0.8683760683760684,0.04636188714456447,0.8661684782608696,0.046899038633777625 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,train,0.9905482041587902,0.004105746968818452,0.9903037190461352,0.0042139043764938214,0.9900055687446877,0.004374663998213056 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,test,0.9090909090909091,0.03792465780229913,0.905982905982906,0.03964596044519224,0.9035326086956521,0.04061963447917913 +flat_mae,patch,logistic,aabc_sex,98,0.046415888336127774,train,0.945179584120983,0.009795002149177418,0.9437615704675844,0.010066642434503014,0.9434918960110203,0.010233679853483979 +flat_mae,patch,logistic,aabc_sex,98,0.046415888336127774,test,0.8363636363636363,0.04846631615516997,0.8281846581048247,0.05214540495479185,0.8226902173913043,0.05256776333874748 +flat_mae,patch,logistic,aabc_sex,99,0.000774263682681127,train,0.8638941398865785,0.015650400345167265,0.8595533791557273,0.016215315310441743,0.857418154107682,0.016392130876872794 +flat_mae,patch,logistic,aabc_sex,99,0.000774263682681127,test,0.8727272727272727,0.04487589498175186,0.8639095086603039,0.050409903744866334,0.8539402173913043,0.05092666942087232 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,train,0.945179584120983,0.010653997190411877,0.9438299336914217,0.010929581503228704,0.9441000615492834,0.011089311565774708 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,test,0.8545454545454545,0.046635977315429844,0.8533333333333333,0.04659772233662555,0.8627717391304348,0.04460055140460567 diff --git a/data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic/log.txt b/data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..00d6d5a50524da9383ca5cf0fb9e436abe03b82a --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:27:50 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/aabc_sex__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:19:25 time: 4.9398 data: 4.0362 max mem: 3205 +extract (train) [ 20/236] eta: 0:01:32 time: 0.2008 data: 0.0582 max mem: 3393 +extract (train) [ 40/236] eta: 0:01:00 time: 0.1859 data: 0.0532 max mem: 3393 +extract (train) [ 60/236] eta: 0:00:47 time: 0.1929 data: 0.0571 max mem: 3393 +extract (train) [ 80/236] eta: 0:00:38 time: 0.1790 data: 0.0499 max mem: 3393 +extract (train) [100/236] eta: 0:00:31 time: 0.1793 data: 0.0508 max mem: 3393 +extract (train) [120/236] eta: 0:00:26 time: 0.1933 data: 0.0568 max mem: 3393 +extract (train) [140/236] eta: 0:00:21 time: 0.1869 data: 0.0541 max mem: 3393 +extract (train) [160/236] eta: 0:00:16 time: 0.1855 data: 0.0533 max mem: 3393 +extract (train) [180/236] eta: 0:00:12 time: 0.1899 data: 0.0545 max mem: 3393 +extract (train) [200/236] eta: 0:00:07 time: 0.1783 data: 0.0506 max mem: 3393 +extract (train) [220/236] eta: 0:00:03 time: 0.1622 data: 0.0432 max mem: 3393 +extract (train) [235/236] eta: 0:00:00 time: 0.1514 data: 0.0391 max mem: 3393 +extract (train) Total time: 0:00:48 (0.2043 s / it) +extract (validation) [ 0/29] eta: 0:01:45 time: 3.6267 data: 3.5021 max mem: 3393 +extract (validation) [20/29] eta: 0:00:02 time: 0.1501 data: 0.0348 max mem: 3393 +extract (validation) [28/29] eta: 0:00:00 time: 0.1487 data: 0.0345 max mem: 3393 +extract (validation) Total time: 0:00:08 (0.2794 s / it) +extract (test) [ 0/28] eta: 0:01:44 time: 3.7486 data: 3.6073 max mem: 3393 +extract (test) [20/28] eta: 0:00:02 time: 0.1697 data: 0.0438 max mem: 3393 +extract (test) [27/28] eta: 0:00:00 time: 0.1459 data: 0.0336 max mem: 3393 +extract (test) Total time: 0:00:08 (0.3000 s / it) +feature extraction time: 0:01:04 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|--------:|:--------|--------:|----------:|--------:|----------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | | 0.35938 | train | 0.99244 | 0.0035932 | 0.99226 | 0.0036792 | 0.99226 | 0.0037125 | +| flat_mae | patch | logistic | aabc_sex | | 0.35938 | test | 0.89091 | 0.041514 | 0.88791 | 0.04243 | 0.89394 | 0.04147 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 166.81005372000556, "split": "test", "acc": 0.8, "acc_std": 0.053450884228358524, "f1": 0.7975911676145868, "f1_std": 0.05380342899941615, "bacc": 0.8036684782608696, "bacc_std": 0.05328333909003386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044801770921314585, "f1": 0.8683760683760684, "f1_std": 0.04704920131178934, "bacc": 0.8661684782608696, "bacc_std": 0.047810608058988624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049868269445817535, "f1": 0.8281846581048247, "f1_std": 0.053673058580491324, "bacc": 0.8226902173913043, "bacc_std": 0.05371832225504935} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049443100279910175, "f1": 0.8354935194416749, "f1_std": 0.04932763025388611, "bacc": 0.8471467391304348, "bacc_std": 0.04665864460056379} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04955138913581792, "f1": 0.8307692307692308, "f1_std": 0.051788466151093455, "bacc": 0.8288043478260869, "bacc_std": 0.0519619909651812} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 2.782559402207126, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.032597297459343054, "f1": 0.9435897435897436, "f1_std": 0.033934850468303086, "bacc": 0.9408967391304348, "bacc_std": 0.03521115110421031} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04129961479019122, "f1": 0.8863636363636364, "f1_std": 0.04406140807088731, "bacc": 0.8817934782608696, "bacc_std": 0.045193321940994576} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05186805893023259, "f1": 0.795677136102668, "f1_std": 0.052983078744054085, "bacc": 0.7975543478260869, "bacc_std": 0.0531022986078622} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 21.54434690031882, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04117185912089803, "f1": 0.8863636363636364, "f1_std": 0.043619145845567094, "bacc": 0.8817934782608696, "bacc_std": 0.04459627950072285} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04484779033041271, "f1": 0.8683760683760684, "f1_std": 0.0466842940400422, "bacc": 0.8661684782608696, "bacc_std": 0.04687993867729493} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03636952679564093, "f1": 0.9252717391304348, "f1_std": 0.03746778687156803, "bacc": 0.9252717391304348, "bacc_std": 0.03765877233902594} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05311793832502184, "f1": 0.790003471017008, "f1_std": 0.057460511757611854, "bacc": 0.7853260869565217, "bacc_std": 0.05701289687081716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042962309062712166, "f1": 0.8891129032258065, "f1_std": 0.04338884389912486, "bacc": 0.8940217391304348, "bacc_std": 0.042343867580525794} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.040123363487133794, "f1": 0.9071259709557582, "f1_std": 0.04098591837723501, "bacc": 0.9096467391304348, "bacc_std": 0.04061030841207201} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 21.54434690031882, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05345771171822414, "f1": 0.76890756302521, "f1_std": 0.05847831469095084, "bacc": 0.7635869565217391, "bacc_std": 0.05744728200713319} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 21.54434690031882, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.047980381665774466, "f1": 0.8663658451926415, "f1_std": 0.052201839356174874, "bacc": 0.8600543478260869, "bacc_std": 0.053256339560986896} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03974969618886964, "f1": 0.884453781512605, "f1_std": 0.044023137653841256, "bacc": 0.8756793478260869, "bacc_std": 0.04503130792582813} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 21.54434690031882, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03833382728152235, "f1": 0.9071259709557582, "f1_std": 0.03911589308652651, "bacc": 0.9096467391304348, "bacc_std": 0.038654286596477364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 21.54434690031882, "split": "test", "acc": 0.8, "acc_std": 0.05046679294429796, "f1": 0.7931623931623932, "f1_std": 0.052656448579322994, "bacc": 0.7914402173913043, "bacc_std": 0.05252342306217075} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04552127465571445, "f1": 0.8711943793911007, "f1_std": 0.04578711187544698, "bacc": 0.8783967391304348, "bacc_std": 0.04439328535357895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.049832459798677155, "f1": 0.7861435136090491, "f1_std": 0.055746276005439875, "bacc": 0.7792119565217391, "bacc_std": 0.054371646597021796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 21.54434690031882, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03368841181933472, "f1": 0.9260752688172043, "f1_std": 0.03402031760445941, "bacc": 0.9313858695652174, "bacc_std": 0.032574051201196934} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04682601523169933, "f1": 0.8699763593380614, "f1_std": 0.047817835710819635, "bacc": 0.8722826086956521, "bacc_std": 0.04766723642691978} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04940985949055138, "f1": 0.8328267477203647, "f1_std": 0.05046526822405365, "bacc": 0.8349184782608696, "bacc_std": 0.050403559162165976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030435212316662606, "f1": 0.9442755825734549, "f1_std": 0.03092647824872064, "bacc": 0.9470108695652174, "bacc_std": 0.029690384367641234} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04847866021114616, "f1": 0.8106060606060606, "f1_std": 0.051597052806010305, "bacc": 0.8070652173913043, "bacc_std": 0.05156378485539002} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.042876335726006895, "f1": 0.8683760683760684, "f1_std": 0.044835704108910086, "bacc": 0.8661684782608696, "bacc_std": 0.045187891208528626} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04564149192482608, "f1": 0.8250265111346766, "f1_std": 0.05200327821794475, "bacc": 0.8165760869565217, "bacc_std": 0.05159495698979989} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04921163606269292, "f1": 0.8307692307692308, "f1_std": 0.051582795908121946, "bacc": 0.8288043478260869, "bacc_std": 0.05171295257956842} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04567497099294767, "f1": 0.8711943793911007, "f1_std": 0.04603273823889641, "bacc": 0.8783967391304348, "bacc_std": 0.0448009871161628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.8, "acc_std": 0.051004209840831774, "f1": 0.7931623931623932, "f1_std": 0.053088832890884584, "bacc": 0.7914402173913043, "bacc_std": 0.05292184989758564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.043579902866852475, "f1": 0.8663658451926415, "f1_std": 0.04710202620608417, "bacc": 0.8600543478260869, "bacc_std": 0.04795752350494955} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03950496987251224, "f1": 0.905982905982906, "f1_std": 0.041324773334540425, "bacc": 0.9035326086956521, "bacc_std": 0.0420820692146386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.045012762193966466, "f1": 0.8484848484848485, "f1_std": 0.047371752217409795, "bacc": 0.8444293478260869, "bacc_std": 0.04764040884954788} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.039984856637614426, "f1": 0.8879076086956521, "f1_std": 0.0411721203533657, "bacc": 0.8879076086956521, "bacc_std": 0.04142525201979005} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.035203512221471216, "f1": 0.9260752688172043, "f1_std": 0.035507057213833945, "bacc": 0.9313858695652174, "bacc_std": 0.033797160282823945} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05562399149424701, "f1": 0.7727272727272727, "f1_std": 0.05934268752189608, "bacc": 0.7697010869565217, "bacc_std": 0.05898269618302792} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04630304775503397, "f1": 0.84593837535014, "f1_std": 0.05097263430179671, "bacc": 0.8383152173913043, "bacc_std": 0.05095856139016274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04612595045942274, "f1": 0.8307692307692308, "f1_std": 0.04830300068921306, "bacc": 0.8288043478260869, "bacc_std": 0.048668960193369214} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04630419005369733, "f1": 0.8281846581048247, "f1_std": 0.04982086786145176, "bacc": 0.8226902173913043, "bacc_std": 0.04992663686849045} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.040524886752229074, "f1": 0.8879076086956521, "f1_std": 0.04170419435567948, "bacc": 0.8879076086956521, "bacc_std": 0.04164180767130103} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 2.782559402207126, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.057367886314255025, "f1": 0.7555555555555555, "f1_std": 0.059738701059157934, "bacc": 0.7540760869565217, "bacc_std": 0.059416502803483685} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044383587168201656, "f1": 0.8683760683760684, "f1_std": 0.046290328368371866, "bacc": 0.8661684782608696, "bacc_std": 0.046607154102812666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 166.81005372000556, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047894981534586115, "f1": 0.8521505376344086, "f1_std": 0.048377449138828396, "bacc": 0.8566576086956521, "bacc_std": 0.04773397944128522} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 2.782559402207126, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03444530938811518, "f1": 0.9260752688172043, "f1_std": 0.034810760335883546, "bacc": 0.9313858695652174, "bacc_std": 0.033219691220591895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04528899306191276, "f1": 0.8699763593380614, "f1_std": 0.04617392943605174, "bacc": 0.8722826086956521, "bacc_std": 0.04600226583486681} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.044238437346305226, "f1": 0.8484848484848485, "f1_std": 0.04713117545256556, "bacc": 0.8444293478260869, "bacc_std": 0.04788467976651367} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.029973409703538883, "f1": 0.9442755825734549, "f1_std": 0.030459293879957234, "bacc": 0.9470108695652174, "bacc_std": 0.029432999141188104} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 166.81005372000556, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04958446837601699, "f1": 0.8151881720430108, "f1_std": 0.050466875716340925, "bacc": 0.8192934782608696, "bacc_std": 0.05034578805722857} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.0444960170935566, "f1": 0.8699763593380614, "f1_std": 0.04531868706132281, "bacc": 0.8722826086956521, "bacc_std": 0.04488204493476197} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.042056412331317766, "f1": 0.8639095086603039, "f1_std": 0.04767537067137091, "bacc": 0.8539402173913043, "bacc_std": 0.04816555804213136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 1291.5496650148827, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04972082058856227, "f1": 0.8328267477203647, "f1_std": 0.05093691291616957, "bacc": 0.8349184782608696, "bacc_std": 0.05129882457128467} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.0469079492461495, "f1": 0.8699763593380614, "f1_std": 0.04790591877316191, "bacc": 0.8722826086956521, "bacc_std": 0.0475857403176399} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047584982699507145, "f1": 0.8521505376344086, "f1_std": 0.048061229422214816, "bacc": 0.8566576086956521, "bacc_std": 0.047089442909126124} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04671179085582863, "f1": 0.8505434782608696, "f1_std": 0.04816033039336056, "bacc": 0.8505434782608696, "bacc_std": 0.04840516099472953} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04941992103434929, "f1": 0.8328267477203647, "f1_std": 0.050604884105487934, "bacc": 0.8349184782608696, "bacc_std": 0.05064134931322486} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04416436617533892, "f1": 0.8699763593380614, "f1_std": 0.04517045994302185, "bacc": 0.8722826086956521, "bacc_std": 0.04507075072142798} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05541608204086904, "f1": 0.7782258064516129, "f1_std": 0.056038183621799256, "bacc": 0.7819293478260869, "bacc_std": 0.05582345230517754} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05443842833273973, "f1": 0.8151881720430108, "f1_std": 0.055262156872824694, "bacc": 0.8192934782608696, "bacc_std": 0.054865759819114054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047990026512058706, "f1": 0.8484848484848485, "f1_std": 0.05085297590945147, "bacc": 0.8444293478260869, "bacc_std": 0.051152358690698986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03404762828356093, "f1": 0.9260752688172043, "f1_std": 0.03433792285114079, "bacc": 0.9313858695652174, "bacc_std": 0.03259288937000265} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 21.54434690031882, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04349589512312607, "f1": 0.8683760683760684, "f1_std": 0.04527019211013552, "bacc": 0.8661684782608696, "bacc_std": 0.045494723367764625} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 21.54434690031882, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.03216486456363053, "f1": 0.9435897435897436, "f1_std": 0.03376846198953986, "bacc": 0.9408967391304348, "bacc_std": 0.035433770970321826} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05080065074093516, "f1": 0.8354935194416749, "f1_std": 0.050627014365217646, "bacc": 0.8471467391304348, "bacc_std": 0.04786581575776505} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03826477568013747, "f1": 0.9045470322804582, "f1_std": 0.041497043154271794, "bacc": 0.8974184782608696, "bacc_std": 0.043090696607070364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04830233983005222, "f1": 0.8484848484848485, "f1_std": 0.05096910285469237, "bacc": 0.8444293478260869, "bacc_std": 0.05120697113273936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03524114800055036, "f1": 0.9252717391304348, "f1_std": 0.03626660727308106, "bacc": 0.9252717391304348, "bacc_std": 0.03661671772885658} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04442356607860849, "f1": 0.8683760683760684, "f1_std": 0.04641913087996633, "bacc": 0.8661684782608696, "bacc_std": 0.04692960516721568} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.033965952811797054, "f1": 0.9242424242424243, "f1_std": 0.036114391077133996, "bacc": 0.9191576086956521, "bacc_std": 0.03793128351403021} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04241457725400977, "f1": 0.8891129032258065, "f1_std": 0.04277599697638235, "bacc": 0.8940217391304348, "bacc_std": 0.04150395710584286} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04694803175800249, "f1": 0.8250265111346766, "f1_std": 0.052760368989171, "bacc": 0.8165760869565217, "bacc_std": 0.051720316697096806} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03477808752506134, "f1": 0.9252717391304348, "f1_std": 0.035761751674160236, "bacc": 0.9252717391304348, "bacc_std": 0.035871101411233335} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.029458473363237318, "f1": 0.9435897435897436, "f1_std": 0.03071120913241949, "bacc": 0.9408967391304348, "bacc_std": 0.03194790103308846} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.043400742652746356, "f1": 0.8879076086956521, "f1_std": 0.044767749053928864, "bacc": 0.8879076086956521, "bacc_std": 0.045200349914890275} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04190637340824667, "f1": 0.8699763593380614, "f1_std": 0.042936848384759765, "bacc": 0.8722826086956521, "bacc_std": 0.04293056580145528} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0483455742075171, "f1": 0.8484848484848485, "f1_std": 0.05083040314833605, "bacc": 0.8444293478260869, "bacc_std": 0.05087286479204501} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 166.81005372000556, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.034982404077497635, "f1": 0.9027925061859314, "f1_std": 0.03962064665448564, "bacc": 0.8913043478260869, "bacc_std": 0.04182678748396455} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 0.3593813663804626, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.024264002517759075, "f1": 0.9626358695652174, "f1_std": 0.024998518331265686, "bacc": 0.9626358695652174, "bacc_std": 0.025354361017826555} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.050394424458752606, "f1": 0.8074229691876751, "f1_std": 0.055318435918774944, "bacc": 0.8009510869565217, "bacc_std": 0.05468635144284033} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05327117590046402, "f1": 0.8131793478260869, "f1_std": 0.05485174040443029, "bacc": 0.8131793478260869, "bacc_std": 0.05478136824841633} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04458448753687339, "f1": 0.8683760683760684, "f1_std": 0.046470438980687596, "bacc": 0.8661684782608696, "bacc_std": 0.046846430703867147} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03585380784784349, "f1": 0.9045470322804582, "f1_std": 0.0386490359895269, "bacc": 0.8974184782608696, "bacc_std": 0.040520596997251974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03528695120890774, "f1": 0.9027925061859314, "f1_std": 0.04008916802328859, "bacc": 0.8913043478260869, "bacc_std": 0.042190919923694024} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.052153251604215044, "f1": 0.8307692307692308, "f1_std": 0.05424111572194943, "bacc": 0.8288043478260869, "bacc_std": 0.05441833963316422} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041407612986299275, "f1": 0.89, "f1_std": 0.04143442805860488, "bacc": 0.9001358695652174, "bacc_std": 0.03900091922932733} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.02700170793496143, "f1": 0.9626358695652174, "f1_std": 0.027813505623025696, "bacc": 0.9626358695652174, "bacc_std": 0.02815792584741834} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 21.54434690031882, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.045668948888298075, "f1": 0.8699763593380614, "f1_std": 0.046701079908440815, "bacc": 0.8722826086956521, "bacc_std": 0.04667032621729881} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05124693097039368, "f1": 0.8307692307692308, "f1_std": 0.053453595453193975, "bacc": 0.8288043478260869, "bacc_std": 0.053863714691638254} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04504212903970848, "f1": 0.8428571428571429, "f1_std": 0.05208508067669945, "bacc": 0.8322010869565217, "bacc_std": 0.051709750486285004} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 21.54434690031882, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04696143661479723, "f1": 0.8484848484848485, "f1_std": 0.04991683667811407, "bacc": 0.8444293478260869, "bacc_std": 0.05027720686702003} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04528643823048248, "f1": 0.8699763593380614, "f1_std": 0.04617618848117934, "bacc": 0.8722826086956521, "bacc_std": 0.04584580545374488} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03795109510718645, "f1": 0.9045470322804582, "f1_std": 0.041034562652099586, "bacc": 0.8974184782608696, "bacc_std": 0.04273976405756128} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044218002391980454, "f1": 0.8663658451926415, "f1_std": 0.04785892113706935, "bacc": 0.8600543478260869, "bacc_std": 0.048799138508657566} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 21.54434690031882, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.040571846282167166, "f1": 0.8863636363636364, "f1_std": 0.04317435367131348, "bacc": 0.8817934782608696, "bacc_std": 0.04433560063666444} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04102068013023562, "f1": 0.8879076086956521, "f1_std": 0.042270179159709355, "bacc": 0.8879076086956521, "bacc_std": 0.04241131861004691} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 21.54434690031882, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04446188822049992, "f1": 0.8683760683760684, "f1_std": 0.04636188714456447, "bacc": 0.8661684782608696, "bacc_std": 0.046899038633777625} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03792465780229913, "f1": 0.905982905982906, "f1_std": 0.03964596044519224, "bacc": 0.9035326086956521, "bacc_std": 0.04061963447917913} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04846631615516997, "f1": 0.8281846581048247, "f1_std": 0.05214540495479185, "bacc": 0.8226902173913043, "bacc_std": 0.05256776333874748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.000774263682681127, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04487589498175186, "f1": 0.8639095086603039, "f1_std": 0.050409903744866334, "bacc": 0.8539402173913043, "bacc_std": 0.05092666942087232} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046635977315429844, "f1": 0.8533333333333333, "f1_std": 0.04659772233662555, "bacc": 0.8627717391304348, "bacc_std": 0.04460055140460567} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | train | 100 | 22.709 | 132.33 | 0.9713 | 0.034206 | 0.97052 | 0.035208 | 0.97036 | 0.035657 | +| flat_mae | patch | logistic | aabc_sex | test | 100 | 22.709 | 132.33 | 0.86927 | 0.043604 | 0.86472 | 0.045369 | 0.86363 | 0.04606 | + + +done! total time: 0:04:48 diff --git a/data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic/config.yaml b/data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..161010b0da153d1ccd61c73ed9f395d5134fe441 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic +remote_dir: null