| { | |
| "model_type": "sam", | |
| "architectures": [ | |
| "SamForSegmentation" | |
| ], | |
| "initializer_range": 0.02, | |
| "mask_decoder_config": { | |
| "mask_input_channels": 16, | |
| "hidden_size": 256, | |
| "iou_head_depth": 3, | |
| "iou_head_hidden_dim": 256, | |
| "num_multimask_outputs": 3 | |
| }, | |
| "prompt_encoder_config": { | |
| "embed_dim": 256, | |
| "image_embedding_size": [ | |
| 64, | |
| 64 | |
| ], | |
| "input_image_size": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "mask_in_channels": [ | |
| 16, | |
| 32, | |
| 64, | |
| 128 | |
| ], | |
| "no_mask_embed": 256, | |
| "num_point_embeddings": 4 | |
| }, | |
| "image_encoder": { | |
| "type": "tiny_vit", | |
| "embed_dim": 96, | |
| "depth": [ | |
| 2, | |
| 2, | |
| 4, | |
| 2 | |
| ], | |
| "num_heads": [ | |
| 3, | |
| 6, | |
| 12, | |
| 24 | |
| ], | |
| "window_size": 7, | |
| "mlp_ratio": 4.0, | |
| "drop_path_rate": 0.1, | |
| "img_size": 1024, | |
| "patch_size": 16, | |
| "out_chans": 256 | |
| }, | |
| "image_size": 1024, | |
| "num_classes": 1 | |
| } |