| # model settings | |
| norm_cfg = dict(type='SyncBN', requires_grad=True) | |
| model = dict( | |
| type='EncoderDecoder', | |
| backbone=dict( | |
| type='VIT_MLA', | |
| model_name='vit_large_patch16_384', | |
| img_size=768, | |
| patch_size=16, | |
| in_chans=3, | |
| embed_dim=1024, | |
| depth=24, | |
| num_heads=16, | |
| num_classes=19, | |
| drop_rate=0.1, | |
| norm_cfg=norm_cfg, | |
| pos_embed_interp=True, | |
| align_corners=False, | |
| mla_channels=256, | |
| mla_index=(5,11,17,23) | |
| ), | |
| decode_head=dict( | |
| type='VIT_MLAHead', | |
| in_channels=1024, | |
| channels=512, | |
| img_size=768, | |
| mla_channels=256, | |
| mlahead_channels=128, | |
| num_classes=19, | |
| norm_cfg=norm_cfg, | |
| align_corners=False, | |
| loss_decode=dict( | |
| type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) | |
| # model training and testing settings | |
| train_cfg = dict() | |
| test_cfg = dict(mode='whole') | |