{ "model_type": "sam", "architectures": [ "SamForSegmentation" ], "initializer_range": 0.02, "mask_decoder_config": { "mask_input_channels": 16, "hidden_size": 256, "iou_head_depth": 3, "iou_head_hidden_dim": 256, "num_multimask_outputs": 3 }, "prompt_encoder_config": { "embed_dim": 256, "image_embedding_size": [ 64, 64 ], "input_image_size": [ 1024, 1024 ], "mask_in_channels": [ 16, 32, 64, 128 ], "no_mask_embed": 256, "num_point_embeddings": 4 }, "image_encoder": { "type": "tiny_vit", "embed_dim": 96, "depth": [ 2, 2, 4, 2 ], "num_heads": [ 3, 6, 12, 24 ], "window_size": 7, "mlp_ratio": 4.0, "drop_path_rate": 0.1, "img_size": 1024, "patch_size": 16, "out_chans": 256 }, "image_size": 1024, "num_classes": 1 }