Upload SegformerForSemanticSegmentation
Browse files- config.json +3 -56
- model.py +63 -3
config.json
CHANGED
|
@@ -2,27 +2,10 @@
|
|
| 2 |
"architectures": [
|
| 3 |
"SegformerForSemanticSegmentation"
|
| 4 |
],
|
| 5 |
-
"attention_probs_dropout_prob": 0.0,
|
| 6 |
"auto_map": {
|
|
|
|
| 7 |
"AutoModelForImageSegmentation": "model.SegformerForSemanticSegmentation"
|
| 8 |
},
|
| 9 |
-
"classifier_dropout_prob": 0.1,
|
| 10 |
-
"decoder_hidden_size": 256,
|
| 11 |
-
"depths": [
|
| 12 |
-
2,
|
| 13 |
-
2,
|
| 14 |
-
2,
|
| 15 |
-
2
|
| 16 |
-
],
|
| 17 |
-
"drop_path_rate": 0.1,
|
| 18 |
-
"hidden_act": "gelu",
|
| 19 |
-
"hidden_dropout_prob": 0.0,
|
| 20 |
-
"hidden_sizes": [
|
| 21 |
-
32,
|
| 22 |
-
64,
|
| 23 |
-
160,
|
| 24 |
-
256
|
| 25 |
-
],
|
| 26 |
"id2label": {
|
| 27 |
"0": "skin",
|
| 28 |
"1": "l_brow",
|
|
@@ -43,7 +26,6 @@
|
|
| 43 |
"16": "hair",
|
| 44 |
"17": "hat"
|
| 45 |
},
|
| 46 |
-
"initializer_range": 0.02,
|
| 47 |
"label2id": {
|
| 48 |
"cloth": 15,
|
| 49 |
"ear_r": 8,
|
|
@@ -64,43 +46,8 @@
|
|
| 64 |
"skin": 0,
|
| 65 |
"u_lip": 11
|
| 66 |
},
|
| 67 |
-
"
|
| 68 |
-
"mlp_ratios": [
|
| 69 |
-
4,
|
| 70 |
-
4,
|
| 71 |
-
4,
|
| 72 |
-
4
|
| 73 |
-
],
|
| 74 |
-
"model_type": "segformer",
|
| 75 |
-
"num_attention_heads": [
|
| 76 |
-
1,
|
| 77 |
-
2,
|
| 78 |
-
5,
|
| 79 |
-
8
|
| 80 |
-
],
|
| 81 |
-
"num_channels": 3,
|
| 82 |
"num_classes": 18,
|
| 83 |
-
"num_encoder_blocks": 4,
|
| 84 |
-
"patch_sizes": [
|
| 85 |
-
7,
|
| 86 |
-
3,
|
| 87 |
-
3,
|
| 88 |
-
3
|
| 89 |
-
],
|
| 90 |
-
"reshape_last_stage": true,
|
| 91 |
-
"semantic_loss_ignore_index": 255,
|
| 92 |
-
"sr_ratios": [
|
| 93 |
-
8,
|
| 94 |
-
4,
|
| 95 |
-
2,
|
| 96 |
-
1
|
| 97 |
-
],
|
| 98 |
-
"strides": [
|
| 99 |
-
4,
|
| 100 |
-
2,
|
| 101 |
-
2,
|
| 102 |
-
2
|
| 103 |
-
],
|
| 104 |
"torch_dtype": "float32",
|
| 105 |
-
"transformers_version": "4.
|
| 106 |
}
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"SegformerForSemanticSegmentation"
|
| 4 |
],
|
|
|
|
| 5 |
"auto_map": {
|
| 6 |
+
"AutoConfig": "model.FaceSegmenterConfig",
|
| 7 |
"AutoModelForImageSegmentation": "model.SegformerForSemanticSegmentation"
|
| 8 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
"id2label": {
|
| 10 |
"0": "skin",
|
| 11 |
"1": "l_brow",
|
|
|
|
| 26 |
"16": "hair",
|
| 27 |
"17": "hat"
|
| 28 |
},
|
|
|
|
| 29 |
"label2id": {
|
| 30 |
"cloth": 15,
|
| 31 |
"ear_r": 8,
|
|
|
|
| 46 |
"skin": 0,
|
| 47 |
"u_lip": 11
|
| 48 |
},
|
| 49 |
+
"model_type": "image-segmentation",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
"num_classes": 18,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
"torch_dtype": "float32",
|
| 52 |
+
"transformers_version": "4.37.0"
|
| 53 |
}
|
model.py
CHANGED
|
@@ -4,6 +4,65 @@ from torch import nn
|
|
| 4 |
from transformers.modeling_outputs import SemanticSegmenterOutput
|
| 5 |
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
def encode_down(c_in: int, c_out: int):
|
| 8 |
return nn.Sequential(
|
| 9 |
nn.Conv2d(in_channels=c_in, out_channels=c_out, kernel_size=3, padding=1),
|
|
@@ -28,7 +87,7 @@ class FaceUNet(nn.Module):
|
|
| 28 |
def __init__(self, num_classes: int):
|
| 29 |
super().__init__()
|
| 30 |
self.num_classes = num_classes
|
| 31 |
-
|
| 32 |
self.down_1 = nn.Conv2d(
|
| 33 |
in_channels=3,
|
| 34 |
out_channels=64,
|
|
@@ -42,6 +101,7 @@ class FaceUNet(nn.Module):
|
|
| 42 |
|
| 43 |
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
|
| 44 |
|
|
|
|
| 45 |
self.up_1 = decode_up(1024)
|
| 46 |
self.up_c1 = encode_down(1024, 512)
|
| 47 |
self.up_2 = decode_up(512)
|
|
@@ -83,7 +143,7 @@ class FaceUNet(nn.Module):
|
|
| 83 |
|
| 84 |
|
| 85 |
class Segformer(transformers.PreTrainedModel):
|
| 86 |
-
config_class =
|
| 87 |
|
| 88 |
def __init__(self, config):
|
| 89 |
super().__init__(config)
|
|
@@ -95,7 +155,7 @@ class Segformer(transformers.PreTrainedModel):
|
|
| 95 |
|
| 96 |
|
| 97 |
class SegformerForSemanticSegmentation(transformers.PreTrainedModel):
|
| 98 |
-
config_class =
|
| 99 |
|
| 100 |
def __init__(self, config):
|
| 101 |
super().__init__(config)
|
|
|
|
| 4 |
from transformers.modeling_outputs import SemanticSegmenterOutput
|
| 5 |
|
| 6 |
|
| 7 |
+
class FaceSegmenterConfig(transformers.PretrainedConfig):
|
| 8 |
+
model_type = "image-segmentation"
|
| 9 |
+
|
| 10 |
+
_id2label = {
|
| 11 |
+
0: "skin",
|
| 12 |
+
1: "l_brow",
|
| 13 |
+
2: "r_brow",
|
| 14 |
+
3: "l_eye",
|
| 15 |
+
4: "r_eye",
|
| 16 |
+
5: "eye_g",
|
| 17 |
+
6: "l_ear",
|
| 18 |
+
7: "r_ear",
|
| 19 |
+
8: "ear_r",
|
| 20 |
+
9: "nose",
|
| 21 |
+
10: "mouth",
|
| 22 |
+
11: "u_lip",
|
| 23 |
+
12: "l_lip",
|
| 24 |
+
13: "neck",
|
| 25 |
+
14: "neck_l",
|
| 26 |
+
15: "cloth",
|
| 27 |
+
16: "hair",
|
| 28 |
+
17: "hat",
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
_label2id = {
|
| 32 |
+
"skin": 0,
|
| 33 |
+
"l_brow": 1,
|
| 34 |
+
"r_brow": 2,
|
| 35 |
+
"l_eye": 3,
|
| 36 |
+
"r_eye": 4,
|
| 37 |
+
"eye_g": 5,
|
| 38 |
+
"l_ear": 6,
|
| 39 |
+
"r_ear": 7,
|
| 40 |
+
"ear_r": 8,
|
| 41 |
+
"nose": 9,
|
| 42 |
+
"mouth": 10,
|
| 43 |
+
"u_lip": 11,
|
| 44 |
+
"l_lip": 12,
|
| 45 |
+
"neck": 13,
|
| 46 |
+
"neck_l": 14,
|
| 47 |
+
"cloth": 15,
|
| 48 |
+
"hair": 16,
|
| 49 |
+
"hat": 17,
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
def __init__(self, **kwargs):
|
| 53 |
+
super().__init__(**kwargs)
|
| 54 |
+
self.id2label = kwargs.get("id2label", self._id2label)
|
| 55 |
+
|
| 56 |
+
# for some reason these are getting convert to strings when used in pipelines
|
| 57 |
+
id_keys = list(self.id2label.keys())
|
| 58 |
+
for label_id in id_keys:
|
| 59 |
+
label_value = self.id2label.pop(label_id)
|
| 60 |
+
self.id2label[int(label_id)] = label_value
|
| 61 |
+
|
| 62 |
+
self.label2id = kwargs.get("label2id", self._label2id)
|
| 63 |
+
self.num_classes = kwargs.get("num_classes", len(self.id2label))
|
| 64 |
+
|
| 65 |
+
|
| 66 |
def encode_down(c_in: int, c_out: int):
|
| 67 |
return nn.Sequential(
|
| 68 |
nn.Conv2d(in_channels=c_in, out_channels=c_out, kernel_size=3, padding=1),
|
|
|
|
| 87 |
def __init__(self, num_classes: int):
|
| 88 |
super().__init__()
|
| 89 |
self.num_classes = num_classes
|
| 90 |
+
# unet
|
| 91 |
self.down_1 = nn.Conv2d(
|
| 92 |
in_channels=3,
|
| 93 |
out_channels=64,
|
|
|
|
| 101 |
|
| 102 |
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
|
| 103 |
|
| 104 |
+
# Below, `in_channels` again becomes 1024 as we are concatinating.
|
| 105 |
self.up_1 = decode_up(1024)
|
| 106 |
self.up_c1 = encode_down(1024, 512)
|
| 107 |
self.up_2 = decode_up(512)
|
|
|
|
| 143 |
|
| 144 |
|
| 145 |
class Segformer(transformers.PreTrainedModel):
|
| 146 |
+
config_class = FaceSegmenterConfig
|
| 147 |
|
| 148 |
def __init__(self, config):
|
| 149 |
super().__init__(config)
|
|
|
|
| 155 |
|
| 156 |
|
| 157 |
class SegformerForSemanticSegmentation(transformers.PreTrainedModel):
|
| 158 |
+
config_class = FaceSegmenterConfig
|
| 159 |
|
| 160 |
def __init__(self, config):
|
| 161 |
super().__init__(config)
|