| | |
| | norm_cfg = dict(type='SyncBN', requires_grad=True) |
| | model = dict( |
| | type='EncoderDecoder', |
| | backbone=dict( |
| | type='VIT_MLA', |
| | model_name='vit_large_patch16_384', |
| | img_size=768, |
| | patch_size=16, |
| | in_chans=3, |
| | embed_dim=1024, |
| | depth=24, |
| | num_heads=16, |
| | num_classes=19, |
| | drop_rate=0.1, |
| | norm_cfg=norm_cfg, |
| | pos_embed_interp=True, |
| | align_corners=False, |
| | mla_channels=256, |
| | mla_index=(5,11,17,23) |
| | ), |
| | decode_head=dict( |
| | type='VIT_MLAHead', |
| | in_channels=1024, |
| | channels=512, |
| | img_size=768, |
| | mla_channels=256, |
| | mlahead_channels=128, |
| | num_classes=19, |
| | norm_cfg=norm_cfg, |
| | align_corners=False, |
| | loss_decode=dict( |
| | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) |
| | |
| | train_cfg = dict() |
| | test_cfg = dict(mode='whole') |
| |
|
| |
|
| |
|