| MaskedAutoEncoderViT( | |
| (patch_embed): PatchEmbed( | |
| (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16)) | |
| (norm): Identity() | |
| ) | |
| (blocks): ModuleList( | |
| (0): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| (1): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| (2): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| (3): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| (4): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| (5): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| (6): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| (7): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| (8): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| (9): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| (10): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| (11): Block( | |
| (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=768, out_features=2304, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=768, out_features=768, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=768, out_features=3072, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=3072, out_features=768, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| ) | |
| (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
| (decoder_embed): Linear(in_features=768, out_features=512, bias=True) | |
| (decoder_blocks): ModuleList( | |
| (0): Block( | |
| (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (attn): Attention( | |
| (qkv): Linear(in_features=512, out_features=1536, bias=True) | |
| (attn_drop): Dropout(p=0.0, inplace=False) | |
| (proj): Linear(in_features=512, out_features=512, bias=True) | |
| (proj_drop): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls1): Identity() | |
| (drop_path1): Identity() | |
| (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (mlp): Mlp( | |
| (fc1): Linear(in_features=512, out_features=2048, bias=True) | |
| (act): GELU(approximate=none) | |
| (drop1): Dropout(p=0.0, inplace=False) | |
| (fc2): Linear(in_features=2048, out_features=512, bias=True) | |
| (drop2): Dropout(p=0.0, inplace=False) | |
| ) | |
| (ls2): Identity() | |
| (drop_path2): Identity() | |
| ) | |
| ) | |
| (decoder_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (decoder_pred): Linear(in_features=512, out_features=768, bias=True) | |
| ) | |
| Namespace(batch_size=256, blr=0.0002, dataset='/home/dataset', decoder_depth=1, device='cuda', dist_backend='nccl', dist_url='env://', distributed=True, epochs=800, gpu=0, lr=0.0032, mask_ratio=0.75, min_lr=0.0, model_name='mae_vit_base_patch16', norm_pix_loss=True, rank=0, resume=None, save_dir='./run/ablation/decoder/depth-1', start_epoch=0, sync_bn=True, use_amp=True, warmup_epochs=40, weight_decay=0.05, workers=16, world_size=16) | |