| crop_size = ( |
| 512, |
| 512, |
| ) |
| model = dict( |
| backbone=dict( |
| adapter_index=[ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| ], |
| block_chunks=0, |
| depth=24, |
| embed_dim=1024, |
| ffn_bias=True, |
| ffn_layer='mlp', |
| has_cat=False, |
| img_size=512, |
| init_values=1e-05, |
| mlp_ratio=4, |
| num_heads=16, |
| cloud_adapter_config=dict( |
| cnn_type='pmaa', |
| context_dim=64, |
| depth=4, |
| emd_dim=1024, |
| global_groups=1, |
| hidden_channels=64, |
| int_type='convnext', |
| local_groups=1, |
| num_layers=24, |
| rank_dim=16, |
| return_last_feature=False, |
| return_multi_feats=False, |
| type='CloudAdapter'), |
| patch_size=16, |
| proj_bias=True, |
| qkv_bias=True, |
| type='CloudAdapterDinoVisionTransformer'), |
| data_preprocessor=dict( |
| bgr_to_rgb=True, |
| mean=[ |
| 123.675, |
| 116.28, |
| 103.53, |
| ], |
| pad_val=0, |
| seg_pad_val=255, |
| size=( |
| 512, |
| 512, |
| ), |
| std=[ |
| 58.395, |
| 57.12, |
| 57.375, |
| ], |
| type='SegDataPreProcessor'), |
| decode_head=dict( |
| align_corners=False, |
| enforce_decoder_input_project=False, |
| feat_channels=256, |
| in_channels=[ |
| 1024, |
| 1024, |
| 1024, |
| 1024, |
| ], |
| loss_cls=dict( |
| class_weight=[ |
| 1.0, |
| 1.0, |
| 1.0, |
| 1.0, |
| 0.1, |
| ], |
| loss_weight=2.0, |
| reduction='mean', |
| type='mmdet.CrossEntropyLoss', |
| use_sigmoid=False), |
| loss_dice=dict( |
| activate=True, |
| eps=1.0, |
| loss_weight=5.0, |
| naive_dice=True, |
| reduction='mean', |
| type='mmdet.DiceLoss', |
| use_sigmoid=True), |
| loss_mask=dict( |
| loss_weight=5.0, |
| reduction='mean', |
| type='mmdet.CrossEntropyLoss', |
| use_sigmoid=True), |
| num_classes=4, |
| num_queries=100, |
| num_transformer_feat_level=3, |
| out_channels=256, |
| pixel_decoder=dict( |
| act_cfg=dict(type='ReLU'), |
| encoder=dict( |
| init_cfg=None, |
| layer_cfg=dict( |
| ffn_cfg=dict( |
| act_cfg=dict(inplace=True, type='ReLU'), |
| embed_dims=256, |
| feedforward_channels=1024, |
| ffn_drop=0.0, |
| num_fcs=2), |
| self_attn_cfg=dict( |
| batch_first=True, |
| dropout=0.0, |
| embed_dims=256, |
| im2col_step=64, |
| init_cfg=None, |
| norm_cfg=None, |
| num_heads=8, |
| num_levels=3, |
| num_points=4)), |
| num_layers=6), |
| init_cfg=None, |
| norm_cfg=dict(num_groups=32, type='GN'), |
| num_outs=3, |
| positional_encoding=dict(normalize=True, num_feats=128), |
| type='mmdet.MSDeformAttnPixelDecoder'), |
| positional_encoding=dict(normalize=True, num_feats=128), |
| strides=[ |
| 4, |
| 8, |
| 16, |
| 32, |
| ], |
| train_cfg=dict( |
| assigner=dict( |
| match_costs=[ |
| dict(type='mmdet.ClassificationCost', weight=2.0), |
| dict( |
| type='mmdet.CrossEntropyLossCost', |
| use_sigmoid=True, |
| weight=5.0), |
| dict( |
| eps=1.0, |
| pred_act=True, |
| type='mmdet.DiceCost', |
| weight=5.0), |
| ], |
| type='mmdet.HungarianAssigner'), |
| importance_sample_ratio=0.75, |
| num_points=12544, |
| oversample_ratio=3.0, |
| sampler=dict(type='mmdet.MaskPseudoSampler')), |
| transformer_decoder=dict( |
| init_cfg=None, |
| layer_cfg=dict( |
| cross_attn_cfg=dict( |
| attn_drop=0.0, |
| batch_first=True, |
| dropout_layer=None, |
| embed_dims=256, |
| num_heads=8, |
| proj_drop=0.0), |
| ffn_cfg=dict( |
| act_cfg=dict(inplace=True, type='ReLU'), |
| add_identity=True, |
| dropout_layer=None, |
| embed_dims=256, |
| feedforward_channels=2048, |
| ffn_drop=0.0, |
| num_fcs=2), |
| self_attn_cfg=dict( |
| attn_drop=0.0, |
| batch_first=True, |
| dropout_layer=None, |
| embed_dims=256, |
| num_heads=8, |
| proj_drop=0.0)), |
| num_layers=9, |
| return_intermediate=True), |
| type='Mask2FormerHead'), |
| test_cfg=dict(mode='whole'), |
| train_cfg=dict(), |
| type='EncoderDecoder') |
|
|
|
|
|
|
|
|