| {"img_size": 1024, "encoder_patch_embed_dim": 192, "encoder_num_heads": 3, "encoder_patch_size": 16, "encoder_depth": 12, "encoder_mlp_ratio": 4.0, "encoder_neck_dims": [256, 256], "decoder_max_num_input_points": 6, "decoder_transformer_depth": 2, "decoder_transformer_mlp_dim": 2048, "decoder_num_heads": 8, "decoder_upscaling_layer_dims": [64, 32], "num_multimask_outputs": 3, "iou_head_depth": 3, "iou_head_hidden_dim": 256, "activation": "gelu", "normalization_type": "layer_norm", "normalize_before_activation": false, "pixel_mean": [0.485, 0.456, 0.406], "pixel_std": [0.229, 0.224, 0.225]} |