| crop_size = ( | |
| 256, | |
| 256, | |
| ) | |
| model = dict( | |
| backbone=dict( | |
| adapter_index=[ | |
| 0, | |
| 1, | |
| 2, | |
| 3, | |
| 4, | |
| 5, | |
| 6, | |
| 7, | |
| 8, | |
| 9, | |
| 10, | |
| 11, | |
| 12, | |
| 13, | |
| 14, | |
| 15, | |
| 16, | |
| 17, | |
| 18, | |
| 19, | |
| 20, | |
| 21, | |
| 22, | |
| 23, | |
| ], | |
| block_chunks=0, | |
| depth=24, | |
| embed_dim=1024, | |
| ffn_bias=True, | |
| ffn_layer='mlp', | |
| has_cat=False, | |
| img_size=512, | |
| init_values=1e-05, | |
| mlp_ratio=4, | |
| num_heads=16, | |
| cloud_adapter_config=dict( | |
| cnn_type='pmaa', | |
| context_dim=64, | |
| depth=4, | |
| emd_dim=1024, | |
| global_groups=1, | |
| hidden_channels=64, | |
| int_type='convnext', | |
| local_groups=1, | |
| num_layers=24, | |
| rank_dim=16, | |
| return_last_feature=False, | |
| return_multi_feats=False, | |
| type='CloudAdapter'), | |
| patch_size=16, | |
| proj_bias=True, | |
| qkv_bias=True, | |
| type='CloudAdapterDinoVisionTransformer'), | |
| data_preprocessor=dict( | |
| bgr_to_rgb=True, | |
| mean=[ | |
| 123.675, | |
| 116.28, | |
| 103.53, | |
| ], | |
| pad_val=0, | |
| seg_pad_val=255, | |
| size=( | |
| 512, | |
| 512, | |
| ), | |
| std=[ | |
| 58.395, | |
| 57.12, | |
| 57.375, | |
| ], | |
| type='SegDataPreProcessor'), | |
| decode_head=dict( | |
| align_corners=False, | |
| enforce_decoder_input_project=False, | |
| feat_channels=256, | |
| in_channels=[ | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| ], | |
| loss_cls=dict( | |
| class_weight=[ | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 0.1, | |
| ], | |
| loss_weight=2.0, | |
| reduction='mean', | |
| type='mmdet.CrossEntropyLoss', | |
| use_sigmoid=False), | |
| loss_dice=dict( | |
| activate=True, | |
| eps=1.0, | |
| loss_weight=5.0, | |
| naive_dice=True, | |
| reduction='mean', | |
| type='mmdet.DiceLoss', | |
| use_sigmoid=True), | |
| loss_mask=dict( | |
| loss_weight=5.0, | |
| reduction='mean', | |
| type='mmdet.CrossEntropyLoss', | |
| use_sigmoid=True), | |
| num_classes=2, | |
| num_queries=100, | |
| num_transformer_feat_level=3, | |
| out_channels=256, | |
| pixel_decoder=dict( | |
| act_cfg=dict(type='ReLU'), | |
| encoder=dict( | |
| init_cfg=None, | |
| layer_cfg=dict( | |
| ffn_cfg=dict( | |
| act_cfg=dict(inplace=True, type='ReLU'), | |
| embed_dims=256, | |
| feedforward_channels=1024, | |
| ffn_drop=0.0, | |
| num_fcs=2), | |
| self_attn_cfg=dict( | |
| batch_first=True, | |
| dropout=0.0, | |
| embed_dims=256, | |
| im2col_step=64, | |
| init_cfg=None, | |
| norm_cfg=None, | |
| num_heads=8, | |
| num_levels=3, | |
| num_points=4)), | |
| num_layers=6), | |
| init_cfg=None, | |
| norm_cfg=dict(num_groups=32, type='GN'), | |
| num_outs=3, | |
| positional_encoding=dict(normalize=True, num_feats=128), | |
| type='mmdet.MSDeformAttnPixelDecoder'), | |
| positional_encoding=dict(normalize=True, num_feats=128), | |
| strides=[ | |
| 4, | |
| 8, | |
| 16, | |
| 32, | |
| ], | |
| train_cfg=dict( | |
| assigner=dict( | |
| match_costs=[ | |
| dict(type='mmdet.ClassificationCost', weight=2.0), | |
| dict( | |
| type='mmdet.CrossEntropyLossCost', | |
| use_sigmoid=True, | |
| weight=5.0), | |
| dict( | |
| eps=1.0, | |
| pred_act=True, | |
| type='mmdet.DiceCost', | |
| weight=5.0), | |
| ], | |
| type='mmdet.HungarianAssigner'), | |
| importance_sample_ratio=0.75, | |
| num_points=12544, | |
| oversample_ratio=3.0, | |
| sampler=dict(type='mmdet.MaskPseudoSampler')), | |
| transformer_decoder=dict( | |
| init_cfg=None, | |
| layer_cfg=dict( | |
| cross_attn_cfg=dict( | |
| attn_drop=0.0, | |
| batch_first=True, | |
| dropout_layer=None, | |
| embed_dims=256, | |
| num_heads=8, | |
| proj_drop=0.0), | |
| ffn_cfg=dict( | |
| act_cfg=dict(inplace=True, type='ReLU'), | |
| add_identity=True, | |
| dropout_layer=None, | |
| embed_dims=256, | |
| feedforward_channels=2048, | |
| ffn_drop=0.0, | |
| num_fcs=2), | |
| self_attn_cfg=dict( | |
| attn_drop=0.0, | |
| batch_first=True, | |
| dropout_layer=None, | |
| embed_dims=256, | |
| num_heads=8, | |
| proj_drop=0.0)), | |
| num_layers=9, | |
| return_intermediate=True), | |
| type='Mask2FormerHead'), | |
| test_cfg=dict(mode='whole'), | |
| train_cfg=dict(), | |
| type='EncoderDecoder') | |