NUM_GPUS=1 MASTER_ADDR=ip-10-0-135-126 MASTER_PORT=16509 WORLD_SIZE=1 ------ ARGS ------- Namespace(found_model_name='HCPflat_large_gsrFalse_', epoch_checkpoint='epoch99.pth', model_suffix='beta', hcp_flat_path='/weka/proj-medarc/shared/HCP-Flat', batch_size=16, wandb_log=True, num_epochs=20, lr_scheduler_type='cycle', save_ckpt=False, seed=42, max_lr=3e-05, target='age', num_workers=15, weight_decay=0.001, global_pool=True) outdir /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ Loaded config.yaml from ckpt folder /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ __CONFIG__ base_lr = 0.001 batch_size = 32 ckpt_interval = 5 ckpt_saving = True cls_embed = True contrastive_loss_weight = 1.0 datasets_to_include = HCP decoder_embed_dim = 512 grad_accumulation_steps = 1 grad_clip = 1.0 gsr = False hcp_flat_path = /weka/proj-medarc/shared/HCP-Flat mask_ratio = 0.75 model_name = HCPflat_large_gsrFalse_ no_qkv_bias = False norm_pix_loss = False nsd_flat_path = /weka/proj-medarc/shared/NSD-Flat num_epochs = 100 num_frames = 16 num_samples_per_epoch = 200000 num_workers = 10 patch_size = 16 pct_masks_to_decode = 1 plotting = True pred_t_dim = 8 print_interval = 20 probe_base_lr = 0.0003 probe_batch_size = 8 probe_num_epochs = 30 probe_num_samples_per_epoch = 100000 resume_from_ckpt = True seed = 42 sep_pos_embed = True t_patch_size = 2 test_num_samples_per_epoch = 50000 test_set = False trunc_init = False use_contrastive_loss = False wandb_log = True WORLD_SIZE=1 PID of this process = 2074741 global_pool = True gsr = False Creating datasets Datasets ready img_size (144, 320) patch_size (16, 16) frames 16 t_patch_size 2 model initialized latest_checkpoint: epoch99.pth Loaded checkpoint epoch99.pth from /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ Input dimension: 1024 total_steps 139140 wandb_config: {'model_name': 'HCPflat_large_gsrFalse__HCP_FT_age', 'batch_size': 16, 'weight_decay': 0.001, 'num_epochs': 20, 'seed': 42, 'lr_scheduler_type': 'cycle', 'save_ckpt': False, 'max_lr': 3e-05, 'target': 'age', 'num_workers': 15} wandb_id: HCPflat_large_gsrFalse__beta_age_HCPFT_185e68b7-ea11-4f13-b6c7-a9ecc17084b1 Step [100/6957] - Training Loss: 0.3892 - Training MSE: 8.0130 Step [200/6957] - Training Loss: 0.4409 - Training MSE: 7.4482 Step [300/6957] - Training Loss: 0.5592 - Training MSE: 7.3383 Step [400/6957] - Training Loss: 0.5652 - Training MSE: 7.1366 Step [500/6957] - Training Loss: 0.3701 - Training MSE: 7.0621 Step [600/6957] - Training Loss: 0.3159 - Training MSE: 6.9860 Step [700/6957] - Training Loss: 0.3431 - Training MSE: 6.9015 Step [800/6957] - Training Loss: 0.5229 - Training MSE: 6.8741 Step [900/6957] - Training Loss: 0.6388 - Training MSE: 6.8505 Step [1000/6957] - Training Loss: 0.5065 - Training MSE: 6.8173 Step [1100/6957] - Training Loss: 0.4181 - Training MSE: 6.8281 Step [1200/6957] - Training Loss: 0.3158 - Training MSE: 6.8081 Step [1300/6957] - Training Loss: 0.3294 - Training MSE: 6.8239 Step [1400/6957] - Training Loss: 0.4973 - Training MSE: 6.8025 Step [1500/6957] - Training Loss: 0.3820 - Training MSE: 6.7897 Step [1600/6957] - Training Loss: 0.3106 - Training MSE: 6.7547 Step [1700/6957] - Training Loss: 0.4086 - Training MSE: 6.7618 Step [1800/6957] - Training Loss: 0.5164 - Training MSE: 6.7494 Step [1900/6957] - Training Loss: 0.3825 - Training MSE: 6.7591 Step [2000/6957] - Training Loss: 0.7149 - Training MSE: 6.7480 Step [2100/6957] - Training Loss: 0.3802 - Training MSE: 6.7397 Step [2200/6957] - Training Loss: 0.3562 - Training MSE: 6.7415 Step [2300/6957] - Training Loss: 0.2990 - Training MSE: 6.7273 Step [2400/6957] - Training Loss: 0.6899 - Training MSE: 6.7225 Step [2500/6957] - Training Loss: 0.4890 - Training MSE: 6.7216 Step [2600/6957] - Training Loss: 0.3221 - Training MSE: 6.7234 Step [2700/6957] - Training Loss: 0.4387 - Training MSE: 6.7172 Step [2800/6957] - Training Loss: 0.5289 - Training MSE: 6.7050 Step [2900/6957] - Training Loss: 0.3853 - Training MSE: 6.6949 Step [3000/6957] - Training Loss: 0.5635 - Training MSE: 6.6938 Step [3100/6957] - Training Loss: 0.2955 - Training MSE: 6.6832 Step [3200/6957] - Training Loss: 0.4344 - Training MSE: 6.6789 Step [3300/6957] - Training Loss: 0.6001 - Training MSE: 6.6725 Step [3400/6957] - Training Loss: 0.5012 - Training MSE: 6.6781 Step [3500/6957] - Training Loss: 0.4927 - Training MSE: 6.6781 Step [3600/6957] - Training Loss: 0.4414 - Training MSE: 6.6745 Step [3700/6957] - Training Loss: 0.5066 - Training MSE: 6.6728 Step [3800/6957] - Training Loss: 0.2950 - Training MSE: 6.6748 Step [3900/6957] - Training Loss: 0.1609 - Training MSE: 6.6664 Step [4000/6957] - Training Loss: 0.5618 - Training MSE: 6.6716 Step [4100/6957] - Training Loss: 0.3337 - Training MSE: 6.6691 Step [4200/6957] - Training Loss: 0.2716 - Training MSE: 6.6694 Step [4300/6957] - Training Loss: 0.5301 - Training MSE: 6.6661 Step [4400/6957] - Training Loss: 0.4866 - Training MSE: 6.6675 Step [4500/6957] - Training Loss: 0.5047 - Training MSE: 6.6645 Step [4600/6957] - Training Loss: 0.6342 - Training MSE: 6.6686 Step [4700/6957] - Training Loss: 0.2574 - Training MSE: 6.6664 Step [4800/6957] - Training Loss: 0.3241 - Training MSE: 6.6717 Step [4900/6957] - Training Loss: 0.3252 - Training MSE: 6.6633 Step [5000/6957] - Training Loss: 0.3624 - Training MSE: 6.6600 Step [5100/6957] - Training Loss: 0.1873 - Training MSE: 6.6616 Step [5200/6957] - Training Loss: 0.2979 - Training MSE: 6.6636 Step [5300/6957] - Training Loss: 0.7098 - Training MSE: 6.6670 Step [5400/6957] - Training Loss: 0.4409 - Training MSE: 6.6670 Step [5500/6957] - Training Loss: 0.7333 - Training MSE: 6.6724 Step [5600/6957] - Training Loss: 0.4391 - Training MSE: 6.6729 Step [5700/6957] - Training Loss: 0.2962 - Training MSE: 6.6756 Step [5800/6957] - Training Loss: 0.2363 - Training MSE: 6.6706 Step [5900/6957] - Training Loss: 0.2784 - Training MSE: 6.6740 Step [6000/6957] - Training Loss: 0.2351 - Training MSE: 6.6702 Step [6100/6957] - Training Loss: 0.3995 - Training MSE: 6.6634 Step [6200/6957] - Training Loss: 0.3206 - Training MSE: 6.6642 Step [6300/6957] - Training Loss: 0.6845 - Training MSE: 6.6648 Step [6400/6957] - Training Loss: 0.2888 - Training MSE: 6.6643 Step [6500/6957] - Training Loss: 0.3775 - Training MSE: 6.6611 Step [6600/6957] - Training Loss: 0.4634 - Training MSE: 6.6647 Step [6700/6957] - Training Loss: 0.3760 - Training MSE: 6.6631 Step [6800/6957] - Training Loss: 0.4631 - Training MSE: 6.6617 Step [6900/6957] - Training Loss: 0.5500 - Training MSE: 6.6584 Epoch [1/20] - Training Loss: 0.4160, Training MSE: 6.6562 - Validation Loss: 0.3768, Validation MSE: 6.0284 Step [100/6957] - Training Loss: 0.3951 - Training MSE: 6.8452 Step [200/6957] - Training Loss: 0.3693 - Training MSE: 6.8970 Step [300/6957] - Training Loss: 0.2880 - Training MSE: 6.7960 Step [400/6957] - Training Loss: 0.3614 - Training MSE: 6.6778 Step [500/6957] - Training Loss: 0.3836 - Training MSE: 6.7002 Step [600/6957] - Training Loss: 0.2527 - Training MSE: 6.6670 Step [700/6957] - Training Loss: 0.5024 - Training MSE: 6.6715 Step [800/6957] - Training Loss: 0.2082 - Training MSE: 6.6020 Step [900/6957] - Training Loss: 0.3287 - Training MSE: 6.6174 Step [1000/6957] - Training Loss: 0.3772 - Training MSE: 6.6493 Step [1100/6957] - Training Loss: 0.3960 - Training MSE: 6.6507 Step [1200/6957] - Training Loss: 0.3269 - Training MSE: 6.6264 Step [1300/6957] - Training Loss: 0.2354 - Training MSE: 6.6205 Step [1400/6957] - Training Loss: 0.7361 - Training MSE: 6.6152 Step [1500/6957] - Training Loss: 0.4436 - Training MSE: 6.6176 Step [1600/6957] - Training Loss: 0.3962 - Training MSE: 6.6356 Step [1700/6957] - Training Loss: 0.3149 - Training MSE: 6.6642 Step [1800/6957] - Training Loss: 0.3905 - Training MSE: 6.6755 Step [1900/6957] - Training Loss: 0.4452 - Training MSE: 6.6547 Step [2000/6957] - Training Loss: 0.3680 - Training MSE: 6.6425 Step [2100/6957] - Training Loss: 0.1843 - Training MSE: 6.6410 Step [2200/6957] - Training Loss: 0.5044 - Training MSE: 6.6335 Step [2300/6957] - Training Loss: 0.3778 - Training MSE: 6.6386 Step [2400/6957] - Training Loss: 0.5021 - Training MSE: 6.6491 Step [2500/6957] - Training Loss: 0.4191 - Training MSE: 6.6336 Step [2600/6957] - Training Loss: 0.2548 - Training MSE: 6.6459 Step [2700/6957] - Training Loss: 0.2546 - Training MSE: 6.6442 Step [2800/6957] - Training Loss: 0.1904 - Training MSE: 6.6486 Step [2900/6957] - Training Loss: 0.2516 - Training MSE: 6.6311 Step [3000/6957] - Training Loss: 0.4126 - Training MSE: 6.6303 Step [3100/6957] - Training Loss: 0.4081 - Training MSE: 6.6321 Step [3200/6957] - Training Loss: 0.5542 - Training MSE: 6.6396 Step [3300/6957] - Training Loss: 0.3723 - Training MSE: 6.6398 Step [3400/6957] - Training Loss: 0.5006 - Training MSE: 6.6385 Step [3500/6957] - Training Loss: 0.3165 - Training MSE: 6.6386 Step [3600/6957] - Training Loss: 0.3776 - Training MSE: 6.6369 Step [3700/6957] - Training Loss: 0.1849 - Training MSE: 6.6355 Step [3800/6957] - Training Loss: 0.4176 - Training MSE: 6.6365 Step [3900/6957] - Training Loss: 0.3486 - Training MSE: 6.6399 Step [4000/6957] - Training Loss: 0.2917 - Training MSE: 6.6375 Step [4100/6957] - Training Loss: 0.2614 - Training MSE: 6.6338 Step [4200/6957] - Training Loss: 0.7503 - Training MSE: 6.6331 Step [4300/6957] - Training Loss: 0.5870 - Training MSE: 6.6363 Step [4400/6957] - Training Loss: 0.5433 - Training MSE: 6.6321 Step [4500/6957] - Training Loss: 0.4100 - Training MSE: 6.6228 Step [4600/6957] - Training Loss: 0.3226 - Training MSE: 6.6281 Step [4700/6957] - Training Loss: 0.3344 - Training MSE: 6.6312 Step [4800/6957] - Training Loss: 0.4366 - Training MSE: 6.6283 Step [4900/6957] - Training Loss: 0.4385 - Training MSE: 6.6264 Step [5000/6957] - Training Loss: 0.3093 - Training MSE: 6.6253 Step [5100/6957] - Training Loss: 0.3095 - Training MSE: 6.6257 Step [5200/6957] - Training Loss: 0.3786 - Training MSE: 6.6220 Step [5300/6957] - Training Loss: 0.5198 - Training MSE: 6.6179 Step [5400/6957] - Training Loss: 0.3992 - Training MSE: 6.6198 Step [5500/6957] - Training Loss: 0.1849 - Training MSE: 6.6086 Step [5600/6957] - Training Loss: 0.3177 - Training MSE: 6.6133 Step [5700/6957] - Training Loss: 0.3840 - Training MSE: 6.6121 Step [5800/6957] - Training Loss: 0.4490 - Training MSE: 6.6044 Step [5900/6957] - Training Loss: 0.5593 - Training MSE: 6.6077 Step [6000/6957] - Training Loss: 0.2385 - Training MSE: 6.6079 Step [6100/6957] - Training Loss: 0.2349 - Training MSE: 6.5971 Step [6200/6957] - Training Loss: 0.1838 - Training MSE: 6.5991 Step [6300/6957] - Training Loss: 0.5872 - Training MSE: 6.6040 Step [6400/6957] - Training Loss: 0.4445 - Training MSE: 6.5962 Step [6500/6957] - Training Loss: 0.3096 - Training MSE: 6.5968 Step [6600/6957] - Training Loss: 0.2932 - Training MSE: 6.5956 Step [6700/6957] - Training Loss: 0.3872 - Training MSE: 6.5955 Step [6800/6957] - Training Loss: 0.5766 - Training MSE: 6.5942 Step [6900/6957] - Training Loss: 0.2552 - Training MSE: 6.5962 Epoch [2/20] - Training Loss: 0.4122, Training MSE: 6.5952 - Validation Loss: 0.4057, Validation MSE: 6.4909