| from transformers.configuration_utils import PretrainedConfig |
|
|
| BACKBONE_NAME2WIDTH = { |
| "swin_tiny_patch4_window7_224": 768, |
| "swin_small_patch4_window7_224": 768, |
| "swin_base_patch4_window7_224": 1024, |
| "solider_tiny": 768, |
| "solider_small": 768, |
| "solider_base": 1024, |
| } |
|
|
|
|
| class SOLIDERConfig(PretrainedConfig): |
| model_type = "swin_transformer" |
|
|
| def __init__( |
| self, |
| pretrain_img_size=224, |
| in_channels=3, |
| embed_dims=96, |
| patch_size=4, |
| window_size=7, |
| mlp_ratio=4, |
| depths=(2, 2, 6, 2), |
| num_heads=(3, 6, 12, 24), |
| strides=(4, 2, 2, 2), |
| out_indices=(0, 1, 2, 3), |
| qkv_bias=True, |
| qk_scale=None, |
| patch_norm=True, |
| drop_rate=0.0, |
| attn_drop_rate=0.0, |
| drop_path_rate=0.0, |
| use_abs_pos_embed=False, |
| act_cfg=dict(type="GELU"), |
| norm_cfg=dict(type="LN"), |
| with_cp=False, |
| pretrained=None, |
| convert_weights=False, |
| frozen_stages=-1, |
| init_cfg=None, |
| semantic_weight=0.5, |
| name="solider_small", |
| **kwargs, |
| ): |
| self.pretrain_img_size = pretrain_img_size |
| self.in_channels = in_channels |
| self.embed_dims = embed_dims |
| self.patch_size = patch_size |
| self.window_size = window_size |
| self.mlp_ratio = mlp_ratio |
| self.depths = depths |
| self.num_heads = num_heads |
| self.strides = strides |
| self.out_indices = out_indices |
| self.qkv_bias = qkv_bias |
| self.qk_scale = qk_scale |
| self.patch_norm = patch_norm |
| self.drop_rate = drop_rate |
| self.attn_drop_rate = attn_drop_rate |
| self.drop_path_rate = drop_path_rate |
| self.use_abs_pos_embed = use_abs_pos_embed |
| self.act_cfg = act_cfg |
| self.norm_cfg = norm_cfg |
| self.with_cp = with_cp |
| self.pretrained = pretrained |
| self.convert_weights = convert_weights |
| self.frozen_stages = frozen_stages |
| self.init_cfg = init_cfg |
| self.semantic_weight = semantic_weight |
|
|
| |
| |
| self.img_size = pretrain_img_size |
| assert name in BACKBONE_NAME2WIDTH |
| self.name = name |
| self.vision_width = BACKBONE_NAME2WIDTH[self.name] |
| self.hidden_size = self.embed_dims |
|
|
| super().__init__(**kwargs) |
|
|