Add fine-tuned MaskFormer model with CVAT compatibility

Browse files

Files changed (4) hide show

README.md +55 -0
config.json +91 -0
model.safetensors +3 -0
preprocessor_config.json +20 -0

README.md ADDED Viewed

	@@ -0,0 +1,55 @@

+---
+license: apache-2.0
+tags:
+- maskformer
+- instance-segmentation
+- image-segmentation
+- abnormal-detection
+datasets:
+- custom
+pipeline_tag: image-segmentation
+---
+# MaskFormer for Normal/Abnormal Detection
+This model is fine-tuned to detect and segment regions classified as either "Normal" or "Abnormal".
+## Model description
+This is a MaskFormer model fine-tuned on a custom dataset with polygon annotations in COCO format. It has two classes:
+- Normal (ID: 0)
+- Abnormal (ID: 1)
+## Intended uses & limitations
+This model is intended for instance segmentation tasks to identify normal and abnormal regions in images.
+### How to use in CVAT
+1. In CVAT, go to Models → Add Model
+2. Select Hugging Face as the source
+3. Enter the model path: "{your-username}/maskformer-abnormal-detection-v4"
+4. Configure the appropriate mapping for your labels (Normal and Abnormal)
+### Usage in Python
+```python
+from transformers import MaskFormerForInstanceSegmentation, MaskFormerImageProcessor
+import torch
+from PIL import Image
+# Load model and processor
+model = MaskFormerForInstanceSegmentation.from_pretrained("{your-username}/maskformer-abnormal-detection-v4")
+processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-tiny-ade")
+# Prepare image
+image = Image.open("your_image.jpg")
+inputs = processor(images=image, return_tensors="pt")
+# Make prediction
+with torch.no_grad():
+    outputs = model(**inputs)
+# Process outputs for visualization
+# (see example code in model repository)
+```

config.json ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "_name_or_path": "./maskformer_finetuned",
+  "architectures": [
+    "MaskFormerForInstanceSegmentation"
+  ],
+  "backbone": null,
+  "backbone_config": {
+    "depths": [
+      2,
+      2,
+      6,
+      2
+    ],
+    "drop_path_rate": 0.3,
+    "embed_dim": 96,
+    "encoder_stride": 32,
+    "hidden_size": 768,
+    "image_size": 224,
+    "in_channels": 3,
+    "model_type": "maskformer-swin",
+    "num_heads": [
+      3,
+      6,
+      12,
+      24
+    ],
+    "out_features": [
+      "stage1",
+      "stage2",
+      "stage3",
+      "stage4"
+    ],
+    "out_indices": [
+      1,
+      2,
+      3,
+      4
+    ],
+    "path_norm": true,
+    "pretrain_img_size": 224,
+    "window_size": 7
+  },
+  "backbone_kwargs": null,
+  "ce_weight": 1.0,
+  "cross_entropy_weight": 1.0,
+  "decoder_config": {
+    "max_position_embeddings": 1024,
+    "model_type": "detr",
+    "scale_embedding": false
+  },
+  "dice_weight": 1.0,
+  "fpn_feature_size": 256,
+  "init_std": 0.02,
+  "init_xavier_std": 1.0,
+  "mask_feature_size": 256,
+  "mask_weight": 20.0,
+  "model_type": "maskformer",
+  "no_object_weight": 0.1,
+  "num_attention_heads": 8,
+  "num_hidden_layers": 6,
+  "num_queries": 100,
+  "output_auxiliary_logits": null,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "use_auxiliary_loss": false,
+  "use_pretrained_backbone": false,
+  "use_timm_backbone": false,
+  "pipeline_tag": "image-segmentation",
+  "id2label": {
+    "0": "Normal",
+    "1": "Abnormal"
+  },
+  "label2id": {
+    "Normal": 0,
+    "Abnormal": 1
+  },
+  "num_labels": 2,
+  "task_specific_params": {
+    "image-segmentation": {
+      "num_labels": 2,
+      "id2label": {
+        "0": "Normal",
+        "1": "Abnormal"
+      },
+      "label2id": {
+        "Normal": 0,
+        "Abnormal": 1
+      }
+    }
+  }
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d23f9a5a47177b2451f74996de9fde955600ef82e2f4cfec7204f9512d5c789c
+size 167175760

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "do_normalize": true,
+  "do_resize": true,
+  "feature_extractor_type": "MaskFormerFeatureExtractor",
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "reduce_labels": false,
+  "size": {
+    "height": 512,
+    "width": 512
+  }
+}