Upload model
Browse files- config.json +1 -1
- configuration_uniformer.py +51 -0
- modelling_uniformer.py +4 -3
config.json
CHANGED
|
@@ -50,5 +50,5 @@
|
|
| 50 |
"qkv_bias": true,
|
| 51 |
"representation_size": null,
|
| 52 |
"torch_dtype": "float32",
|
| 53 |
-
"transformers_version": "4.
|
| 54 |
}
|
|
|
|
| 50 |
"qkv_bias": true,
|
| 51 |
"representation_size": null,
|
| 52 |
"torch_dtype": "float32",
|
| 53 |
+
"transformers_version": "4.39.3"
|
| 54 |
}
|
configuration_uniformer.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import PretrainedConfig
|
| 2 |
+
from transformers.utils import logging
|
| 3 |
+
|
| 4 |
+
logger = logging.get_logger(__name__)
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class UniFormerWithProjectionHeadConfig(PretrainedConfig):
|
| 8 |
+
|
| 9 |
+
model_type = 'uniformer'
|
| 10 |
+
|
| 11 |
+
def __init__(
|
| 12 |
+
self,
|
| 13 |
+
projection_size=None,
|
| 14 |
+
embed_dim=[64, 128, 320, 512],
|
| 15 |
+
image_size=384,
|
| 16 |
+
in_chans=3,
|
| 17 |
+
depth=[5, 8, 20, 7],
|
| 18 |
+
patch_size=[4, 2, 2, 2],
|
| 19 |
+
head_dim=64,
|
| 20 |
+
mlp_ratio=4,
|
| 21 |
+
qkv_bias=True,
|
| 22 |
+
num_classes=1000,
|
| 23 |
+
qk_scale=None,
|
| 24 |
+
representation_size=None,
|
| 25 |
+
drop_rate=0.0,
|
| 26 |
+
drop_path_rate=0.3,
|
| 27 |
+
attn_drop_rate=0.0,
|
| 28 |
+
conv_stem=False,
|
| 29 |
+
layer_norm_eps=1e-6,
|
| 30 |
+
**kwargs,
|
| 31 |
+
):
|
| 32 |
+
super().__init__(
|
| 33 |
+
layer_norm_eps=layer_norm_eps,
|
| 34 |
+
image_size=image_size,
|
| 35 |
+
qkv_bias=qkv_bias,
|
| 36 |
+
**kwargs,
|
| 37 |
+
)
|
| 38 |
+
self.projection_size = projection_size
|
| 39 |
+
self.embed_dim = embed_dim
|
| 40 |
+
self.in_chans = in_chans
|
| 41 |
+
self.depth = depth
|
| 42 |
+
self.patch_size = patch_size
|
| 43 |
+
self.head_dim = head_dim
|
| 44 |
+
self.mlp_ratio = mlp_ratio
|
| 45 |
+
self.num_classes = num_classes
|
| 46 |
+
self.qk_scale = qk_scale
|
| 47 |
+
self.representation_size = representation_size
|
| 48 |
+
self.drop_rate = drop_rate
|
| 49 |
+
self.drop_path_rate = drop_path_rate
|
| 50 |
+
self.attn_drop_rate = attn_drop_rate
|
| 51 |
+
self.conv_stem = conv_stem
|
modelling_uniformer.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from collections import OrderedDict
|
| 2 |
from functools import partial
|
| 3 |
-
from typing import Optional, Tuple, Union
|
| 4 |
from math import isqrt
|
|
|
|
| 5 |
|
| 6 |
import torch
|
| 7 |
import torch.nn as nn
|
|
@@ -11,6 +11,8 @@ from transformers.modeling_outputs import ModelOutput
|
|
| 11 |
from transformers.modeling_utils import PreTrainedModel
|
| 12 |
from transformers.utils import logging
|
| 13 |
|
|
|
|
|
|
|
| 14 |
logger = logging.get_logger(__name__)
|
| 15 |
|
| 16 |
|
|
@@ -293,8 +295,7 @@ class UniFormerPreTrainedModel(PreTrainedModel):
|
|
| 293 |
models.
|
| 294 |
"""
|
| 295 |
|
| 296 |
-
config_class =
|
| 297 |
-
base_model_prefix = "vit"
|
| 298 |
main_input_name = "pixel_values"
|
| 299 |
|
| 300 |
def _init_weights(self, m):
|
|
|
|
| 1 |
from collections import OrderedDict
|
| 2 |
from functools import partial
|
|
|
|
| 3 |
from math import isqrt
|
| 4 |
+
from typing import Optional, Tuple, Union
|
| 5 |
|
| 6 |
import torch
|
| 7 |
import torch.nn as nn
|
|
|
|
| 11 |
from transformers.modeling_utils import PreTrainedModel
|
| 12 |
from transformers.utils import logging
|
| 13 |
|
| 14 |
+
from .configuration_uniformer import UniFormerWithProjectionHeadConfig
|
| 15 |
+
|
| 16 |
logger = logging.get_logger(__name__)
|
| 17 |
|
| 18 |
|
|
|
|
| 295 |
models.
|
| 296 |
"""
|
| 297 |
|
| 298 |
+
config_class = UniFormerWithProjectionHeadConfig
|
|
|
|
| 299 |
main_input_name = "pixel_values"
|
| 300 |
|
| 301 |
def _init_weights(self, m):
|