Upload model
Browse files- config.json +26 -0
- configuration_vitmodel.py +39 -0
- modeling_vitmodel.py +16 -0
- pytorch_model.bin +3 -0
config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"VitMemModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.0,
|
| 6 |
+
"auto_map": {
|
| 7 |
+
"AutoConfig": "configuration_vitmodel.ViTConfig",
|
| 8 |
+
"AutoModel": "modeling_vitmodel.VitMemModel"
|
| 9 |
+
},
|
| 10 |
+
"encoder_stride": 16,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.0,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"image_size": 224,
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"intermediate_size": 3072,
|
| 17 |
+
"layer_norm_eps": 1e-12,
|
| 18 |
+
"model_type": "vit",
|
| 19 |
+
"num_attention_heads": 12,
|
| 20 |
+
"num_channels": 3,
|
| 21 |
+
"num_hidden_layers": 12,
|
| 22 |
+
"patch_size": 16,
|
| 23 |
+
"qkv_bias": true,
|
| 24 |
+
"torch_dtype": "float32",
|
| 25 |
+
"transformers_version": "4.30.1"
|
| 26 |
+
}
|
configuration_vitmodel.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import PretrainedConfig
|
| 2 |
+
|
| 3 |
+
class ViTConfig(PretrainedConfig):
|
| 4 |
+
model_type = "vit"
|
| 5 |
+
|
| 6 |
+
def __init__(
|
| 7 |
+
self,
|
| 8 |
+
hidden_size=768,
|
| 9 |
+
num_hidden_layers=12,
|
| 10 |
+
num_attention_heads=12,
|
| 11 |
+
intermediate_size=3072,
|
| 12 |
+
hidden_act="gelu",
|
| 13 |
+
hidden_dropout_prob=0.0,
|
| 14 |
+
attention_probs_dropout_prob=0.0,
|
| 15 |
+
initializer_range=0.02,
|
| 16 |
+
layer_norm_eps=1e-12,
|
| 17 |
+
image_size=224,
|
| 18 |
+
patch_size=16,
|
| 19 |
+
num_channels=3,
|
| 20 |
+
qkv_bias=True,
|
| 21 |
+
encoder_stride=16,
|
| 22 |
+
**kwargs,
|
| 23 |
+
):
|
| 24 |
+
super().__init__(**kwargs)
|
| 25 |
+
|
| 26 |
+
self.hidden_size = hidden_size
|
| 27 |
+
self.num_hidden_layers = num_hidden_layers
|
| 28 |
+
self.num_attention_heads = num_attention_heads
|
| 29 |
+
self.intermediate_size = intermediate_size
|
| 30 |
+
self.hidden_act = hidden_act
|
| 31 |
+
self.hidden_dropout_prob = hidden_dropout_prob
|
| 32 |
+
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
| 33 |
+
self.initializer_range = initializer_range
|
| 34 |
+
self.layer_norm_eps = layer_norm_eps
|
| 35 |
+
self.image_size = image_size
|
| 36 |
+
self.patch_size = patch_size
|
| 37 |
+
self.num_channels = num_channels
|
| 38 |
+
self.qkv_bias = qkv_bias
|
| 39 |
+
self.encoder_stride = encoder_stride
|
modeling_vitmodel.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import timm
|
| 3 |
+
from transformers import PreTrainedModel
|
| 4 |
+
from .configuration_vitmodel import ViTConfig
|
| 5 |
+
|
| 6 |
+
class VitMemModel(PreTrainedModel):
|
| 7 |
+
config_class = ViTConfig
|
| 8 |
+
|
| 9 |
+
def __init__(self, config: ViTConfig):
|
| 10 |
+
super().__init__(config)
|
| 11 |
+
self.model = timm.create_model("vit_base_patch16_224_miil", pretrained=False, num_classes=1)
|
| 12 |
+
|
| 13 |
+
def forward(self, tensor, labels=None):
|
| 14 |
+
vitfeat = self.model(tensor)
|
| 15 |
+
out = torch.sigmoid(vitfeat)
|
| 16 |
+
return out
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acb63c96d24c2ca7347982300c11360053a3f93b64dfa9b18057e0c516aef6dc
|
| 3 |
+
size 343130109
|