Add Uniformer (segmentation) models
Browse filesDerived from https://github.com/Sense-X/UniFormer/
uniformer_base/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Originally from https://github.com/Sense-X/UniFormer, converted to safetensors and flattend the config for inference, used under Apache-2.0
|
uniformer_base/test_config_g.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 2 |
+
model = dict(
|
| 3 |
+
type='EncoderDecoder',
|
| 4 |
+
pretrained=None,
|
| 5 |
+
backbone=dict(
|
| 6 |
+
type='UniFormer',
|
| 7 |
+
embed_dim=[64, 128, 320, 512],
|
| 8 |
+
layers=[5, 8, 20, 7],
|
| 9 |
+
head_dim=64,
|
| 10 |
+
mlp_ratio=4.,
|
| 11 |
+
qkv_bias=True,
|
| 12 |
+
drop_rate=0.,
|
| 13 |
+
attn_drop_rate=0.,
|
| 14 |
+
drop_path_rate=0.4,
|
| 15 |
+
windows=False,
|
| 16 |
+
hybrid=False),
|
| 17 |
+
decode_head=dict(
|
| 18 |
+
type='UPerHead',
|
| 19 |
+
in_channels=[64, 128, 320, 512],
|
| 20 |
+
in_index=[0, 1, 2, 3],
|
| 21 |
+
pool_scales=(1, 2, 3, 6),
|
| 22 |
+
channels=512,
|
| 23 |
+
dropout_ratio=0.1,
|
| 24 |
+
num_classes=150,
|
| 25 |
+
norm_cfg=norm_cfg,
|
| 26 |
+
align_corners=False,
|
| 27 |
+
loss_decode=dict(
|
| 28 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 29 |
+
auxiliary_head=dict(
|
| 30 |
+
type='FCNHead',
|
| 31 |
+
in_channels=320,
|
| 32 |
+
in_index=2,
|
| 33 |
+
channels=256,
|
| 34 |
+
num_convs=1,
|
| 35 |
+
concat_input=False,
|
| 36 |
+
dropout_ratio=0.1,
|
| 37 |
+
num_classes=150,
|
| 38 |
+
norm_cfg=norm_cfg,
|
| 39 |
+
align_corners=False,
|
| 40 |
+
loss_decode=dict(
|
| 41 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 42 |
+
# model training and testing settings
|
| 43 |
+
train_cfg=dict(),
|
| 44 |
+
test_cfg=dict(mode='whole'))
|
| 45 |
+
|
| 46 |
+
# dataset settings
|
| 47 |
+
dataset_type = 'ADE20KDataset'
|
| 48 |
+
data_root = 'data/ade/ADEChallengeData2016'
|
| 49 |
+
img_norm_cfg = dict(
|
| 50 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 51 |
+
crop_size = (512, 512)
|
| 52 |
+
|
| 53 |
+
test_pipeline = [
|
| 54 |
+
dict(type='LoadImageFromFile'),
|
| 55 |
+
dict(
|
| 56 |
+
type='MultiScaleFlipAug',
|
| 57 |
+
img_scale=(2048, 512),
|
| 58 |
+
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
| 59 |
+
flip=False,
|
| 60 |
+
transforms=[
|
| 61 |
+
dict(type='Resize', keep_ratio=True),
|
| 62 |
+
dict(type='RandomFlip'),
|
| 63 |
+
dict(type='Normalize', **img_norm_cfg),
|
| 64 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 65 |
+
dict(type='Collect', keys=['img']),
|
| 66 |
+
])
|
| 67 |
+
]
|
| 68 |
+
|
| 69 |
+
data = dict(
|
| 70 |
+
samples_per_gpu=2,
|
| 71 |
+
workers_per_gpu=4,
|
| 72 |
+
test=dict(
|
| 73 |
+
type=dataset_type,
|
| 74 |
+
data_root=data_root,
|
| 75 |
+
img_dir='images/validation',
|
| 76 |
+
ann_dir='annotations/validation',
|
| 77 |
+
pipeline=test_pipeline))
|
uniformer_base/upernet_global_base.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b29aa3cb2a5765d61a56b573f36e2fbd59ac24cc78e1662b49ce898bd5e40758
|
| 3 |
+
size 319165833
|
uniformer_small/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Originally from https://github.com/Sense-X/UniFormer, converted to safetensors and flattend the config for inference, used under Apache-2.0
|
uniformer_small/test_config_g.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model settings
|
| 2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 3 |
+
model = dict(
|
| 4 |
+
type='EncoderDecoder',
|
| 5 |
+
pretrained=None,
|
| 6 |
+
backbone=dict(
|
| 7 |
+
type='UniFormer',
|
| 8 |
+
embed_dim=[64, 128, 320, 512],
|
| 9 |
+
layers=[3, 4, 8, 3],
|
| 10 |
+
head_dim=64,
|
| 11 |
+
mlp_ratio=4.,
|
| 12 |
+
qkv_bias=True,
|
| 13 |
+
drop_rate=0.,
|
| 14 |
+
attn_drop_rate=0.,
|
| 15 |
+
drop_path_rate=0.25,
|
| 16 |
+
windows=False,
|
| 17 |
+
hybrid=False),
|
| 18 |
+
decode_head=dict(
|
| 19 |
+
type='UPerHead',
|
| 20 |
+
in_channels=[64, 128, 320, 512],
|
| 21 |
+
in_index=[0, 1, 2, 3],
|
| 22 |
+
pool_scales=(1, 2, 3, 6),
|
| 23 |
+
channels=512,
|
| 24 |
+
dropout_ratio=0.1,
|
| 25 |
+
num_classes=150,
|
| 26 |
+
norm_cfg=norm_cfg,
|
| 27 |
+
align_corners=False,
|
| 28 |
+
loss_decode=dict(
|
| 29 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 30 |
+
auxiliary_head=dict(
|
| 31 |
+
type='FCNHead',
|
| 32 |
+
in_channels=320,
|
| 33 |
+
in_index=2,
|
| 34 |
+
channels=256,
|
| 35 |
+
num_convs=1,
|
| 36 |
+
concat_input=False,
|
| 37 |
+
dropout_ratio=0.1,
|
| 38 |
+
num_classes=150,
|
| 39 |
+
norm_cfg=norm_cfg,
|
| 40 |
+
align_corners=False,
|
| 41 |
+
loss_decode=dict(
|
| 42 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 43 |
+
# model training and testing settings
|
| 44 |
+
train_cfg=dict(),
|
| 45 |
+
test_cfg=dict(mode='whole'))
|
| 46 |
+
|
| 47 |
+
# dataset settings
|
| 48 |
+
dataset_type = 'ADE20KDataset'
|
| 49 |
+
data_root = 'data/ade/ADEChallengeData2016'
|
| 50 |
+
img_norm_cfg = dict(
|
| 51 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 52 |
+
crop_size = (512, 512)
|
| 53 |
+
|
| 54 |
+
test_pipeline = [
|
| 55 |
+
dict(type='LoadImageFromFile'),
|
| 56 |
+
dict(
|
| 57 |
+
type='MultiScaleFlipAug',
|
| 58 |
+
img_scale=(2048, 512),
|
| 59 |
+
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
| 60 |
+
flip=False,
|
| 61 |
+
transforms=[
|
| 62 |
+
dict(type='Resize', keep_ratio=True),
|
| 63 |
+
dict(type='RandomFlip'),
|
| 64 |
+
dict(type='Normalize', **img_norm_cfg),
|
| 65 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 66 |
+
dict(type='Collect', keys=['img']),
|
| 67 |
+
])
|
| 68 |
+
]
|
| 69 |
+
|
| 70 |
+
data = dict(
|
| 71 |
+
samples_per_gpu=2,
|
| 72 |
+
workers_per_gpu=4,
|
| 73 |
+
test=dict(
|
| 74 |
+
type=dataset_type,
|
| 75 |
+
data_root=data_root,
|
| 76 |
+
img_dir='images/validation',
|
| 77 |
+
ann_dir='annotations/validation',
|
| 78 |
+
pipeline=test_pipeline))
|
uniformer_small/upernet_global_small.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bae1398435fd2ef9da3bdec5031211cb71b515a97651ccfbac83987f6f575d5d
|
| 3 |
+
size 206193755
|