StomataPy400K-Models / StomataPy400K_aperture_512 /seg_rein_dinov2_mask2former.py
aliasz's picture
Uploaded StomataPy400K model weights
805d339 verified
ReduceOnPlateauLR_patience = 50
albu_train_transforms = [
dict(
always_apply=True,
border_mode=0,
min_height=512,
min_width=512,
type='PadIfNeeded'),
dict(always_apply=True, type='Flip'),
dict(
always_apply=True, interpolation=4, limit=(
-180,
180,
), type='Rotate'),
dict(
p=0.5,
transforms=[
dict(
alpha=20,
approximate=True,
border_mode=0,
interpolation=4,
mask_value=(
0,
0,
0,
),
p=0.5,
same_dxdy=True,
sigma=15,
type='ElasticTransform'),
dict(
alpha=40,
approximate=True,
border_mode=0,
interpolation=4,
mask_value=(
0,
0,
0,
),
p=0.5,
same_dxdy=False,
sigma=15,
type='ElasticTransform'),
],
type='OneOf'),
dict(
always_apply=True,
brightness=0.2,
contrast=0.1,
hue=0.2,
saturation=0.2,
type='ColorJitter'),
dict(p=0.5, type='AdvancedBlur'),
dict(always_apply=True, height=512, type='CenterCrop', width=512),
]
auto_scale_lr = dict(base_batch_size=16, enable=False)
batch_size = 8
class_weight = [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.1,
]
classes = (
'background',
'stomatal complex',
'stoma',
'outer ledge',
'pore',
'pavement cell',
)
crop_size = (
512,
512,
)
data_root = 'StomataPy400K_filtered_train/'
dataset_type = 'StomataDataset'
default_hooks = dict(
checkpoint=dict(
by_epoch=True,
interval=999999,
save_best='mIoU',
save_last=True,
type='CheckpointHook'),
early_stopping=dict(
monitor='mIoU', patience=150, rule='greater',
type='EarlyStoppingHook'),
logger=dict(interval=600, log_metric_by_epoch=True, type='LoggerHook'),
param_scheduler=dict(type='ParamSchedulerHook'),
sampler_seed=dict(type='DistSamplerSeedHook'),
timer=dict(type='IterTimerHook'),
visualization=dict(draw=True, interval=50, type='SegVisualizationHook'))
default_scope = 'mmseg'
dinov2_checkpoint = 'train/checkpoints/dinov2_converted.pth'
early_stopping_patience = 150
env_cfg = dict(
cudnn_benchmark=True,
dist_cfg=dict(backend='nccl'),
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
find_unused_parameters = True
fp16 = dict(loss_scale='dynamic')
launcher = 'pytorch'
load_from = None
log_level = 'INFO'
log_processor = dict(by_epoch=True)
lr = 1e-05
max_epochs = 300
model = dict(
backbone=dict(
block_chunks=0,
depth=24,
embed_dim=1024,
ffn_bias=True,
ffn_layer='mlp',
img_size=512,
init_cfg=dict(
checkpoint='train/checkpoints/dinov2_converted.pth',
type='Pretrained'),
init_values=1e-05,
mlp_ratio=4,
num_heads=16,
patch_size=16,
proj_bias=True,
qkv_bias=True,
reins_config=dict(
embed_dims=1024,
link_token_to_query=True,
lora_dim=16,
num_layers=24,
patch_size=16,
token_length=100,
type='LoRAReins'),
type='ReinsDinoVisionTransformer'),
data_preprocessor=dict(
bgr_to_rgb=True,
mean=[
123.675,
116.28,
103.53,
],
pad_val=0,
seg_pad_val=255,
size=(
512,
512,
),
std=[
58.395,
57.12,
57.375,
],
type='SegDataPreProcessor'),
decode_head=dict(
align_corners=False,
enforce_decoder_input_project=False,
feat_channels=256,
in_channels=[
1024,
1024,
1024,
1024,
],
loss_cls=dict(
class_weight=[
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.1,
],
loss_weight=2.0,
reduction='mean',
type='mmdet.CrossEntropyLoss',
use_sigmoid=False),
loss_dice=dict(
activate=True,
eps=1.0,
loss_weight=5.0,
naive_dice=True,
reduction='mean',
type='mmdet.DiceLoss',
use_sigmoid=True),
loss_mask=dict(
loss_weight=5.0,
reduction='mean',
type='mmdet.CrossEntropyLoss',
use_sigmoid=True),
num_classes=6,
num_queries=100,
num_transformer_feat_level=3,
out_channels=256,
pixel_decoder=dict(
act_cfg=dict(type='ReLU'),
encoder=dict(
init_cfg=None,
layer_cfg=dict(
ffn_cfg=dict(
act_cfg=dict(inplace=True, type='ReLU'),
embed_dims=256,
feedforward_channels=1024,
ffn_drop=0.0,
num_fcs=2),
self_attn_cfg=dict(
batch_first=True,
dropout=0.0,
embed_dims=256,
im2col_step=64,
init_cfg=None,
norm_cfg=None,
num_heads=8,
num_levels=3,
num_points=4)),
num_layers=6),
init_cfg=None,
norm_cfg=dict(num_groups=32, type='GN'),
num_outs=3,
positional_encoding=dict(normalize=True, num_feats=128),
type='mmdet.MSDeformAttnPixelDecoder'),
positional_encoding=dict(normalize=True, num_feats=128),
replace_query_feat=True,
strides=[
4,
8,
16,
32,
],
train_cfg=dict(
assigner=dict(
match_costs=[
dict(type='mmdet.ClassificationCost', weight=2.0),
dict(
type='mmdet.CrossEntropyLossCost',
use_sigmoid=True,
weight=5.0),
dict(
eps=1.0,
pred_act=True,
type='mmdet.DiceCost',
weight=5.0),
],
type='mmdet.HungarianAssigner'),
importance_sample_ratio=0.75,
num_points=12544,
oversample_ratio=3.0,
sampler=dict(type='mmdet.MaskPseudoSampler')),
transformer_decoder=dict(
init_cfg=None,
layer_cfg=dict(
cross_attn_cfg=dict(
attn_drop=0.0,
batch_first=True,
dropout_layer=None,
embed_dims=256,
num_heads=8,
proj_drop=0.0),
ffn_cfg=dict(
act_cfg=dict(inplace=True, type='ReLU'),
add_identity=True,
dropout_layer=None,
embed_dims=256,
feedforward_channels=2048,
ffn_drop=0.0,
num_fcs=2),
self_attn_cfg=dict(
attn_drop=0.0,
batch_first=True,
dropout_layer=None,
embed_dims=256,
num_heads=8,
proj_drop=0.0)),
num_layers=9,
return_intermediate=True),
type='ReinMask2FormerHead'),
test_cfg=dict(crop_size=(
512,
512,
), mode='slide', stride=(
1364,
1364,
)),
train_cfg=dict(),
type='EncoderDecoder')
model_crop_size = (
512,
512,
)
n_gpus = 4
num_classes = 6
num_workers = 16
optim_wrapper = dict(
constructor='PEFTOptimWrapperConstructor',
optimizer=dict(
betas=(
0.9,
0.999,
),
eps=1e-08,
lr=1e-05,
type='AdamW',
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys=dict({
'learnable_tokens': dict(decay_mult=0.0, lr_mult=1.0),
'level_embed': dict(decay_mult=0.0, lr_mult=1.0),
'norm': dict(decay_mult=0.0),
'query_embed': dict(decay_mult=0.0, lr_mult=1.0),
'reins.scale': dict(decay_mult=0.0, lr_mult=1.0)
}),
norm_decay_mult=0.0))
optimizer_config = dict(
cumulative_iters=8, type='GradientCumulativeOptimizerHook')
original_batch_size = 4
original_lr = 0.0001
original_n_gpus = 8
output_dir = 'StomataPy400K_aperture_512'
param_scheduler = [
dict(
begin=0,
by_epoch=True,
convert_to_iter_based=True,
end=30,
end_factor=1.0,
start_factor=0.001,
type='LinearLR',
verbose=False),
dict(
T_max=270,
begin=30,
by_epoch=True,
convert_to_iter_based=True,
end=300,
eta_min=1.0000000000000002e-10,
eta_min_ratio=None,
type='CosineAnnealingLR',
verbose=False),
dict(
by_epoch=True,
factor=0.75,
monitor='mIoU',
patience=50,
rule='greater',
type='ReduceOnPlateauLR',
verbose=False),
]
randomness = dict(deterministic=False, seed=42)
resume = False
test_cfg = dict(type='ValLoop')
test_dataloader = dict(
batch_size=1,
dataset=dict(
ann_file='splits//val.txt',
data_prefix=dict(img_path='images', seg_map_path='labels'),
data_root='StomataPy400K_filtered_train/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(prob=0.5, type='RandomFlip'),
dict(keep_ratio=False, scale=(
512,
512,
), type='Resize'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='PackSegInputs'),
],
type='StomataDataset'),
num_workers=16)
test_evaluator = dict(
iou_metrics=[
'mIoU',
], type='IoUMetric')
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(prob=0.5, type='RandomFlip'),
dict(keep_ratio=False, scale=(
512,
512,
), type='Resize'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='PackSegInputs'),
]
train_cfg = dict(max_epochs=300, type='EpochBasedTrainLoop', val_interval=1)
train_dataloader = dict(
batch_size=8,
dataset=dict(
ann_file='splits//train.txt',
data_prefix=dict(img_path='images', seg_map_path='labels'),
data_root='StomataPy400K_filtered_train/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(
max_size=2048,
resize_type='ResizeShortestEdge',
scales=[
358,
409,
460,
512,
563,
614,
665,
716,
],
type='RandomChoiceResize'),
dict(
cat_max_ratio=0.75, crop_size=(
512,
512,
), type='RandomCrop'),
dict(
keymap=dict(gt_seg_map='mask', img='image'),
transforms=[
dict(
always_apply=True,
border_mode=0,
min_height=512,
min_width=512,
type='PadIfNeeded'),
dict(always_apply=True, type='Flip'),
dict(
always_apply=True,
interpolation=4,
limit=(
-180,
180,
),
type='Rotate'),
dict(
p=0.5,
transforms=[
dict(
alpha=20,
approximate=True,
border_mode=0,
interpolation=4,
mask_value=(
0,
0,
0,
),
p=0.5,
same_dxdy=True,
sigma=15,
type='ElasticTransform'),
dict(
alpha=40,
approximate=True,
border_mode=0,
interpolation=4,
mask_value=(
0,
0,
0,
),
p=0.5,
same_dxdy=False,
sigma=15,
type='ElasticTransform'),
],
type='OneOf'),
dict(
always_apply=True,
brightness=0.2,
contrast=0.1,
hue=0.2,
saturation=0.2,
type='ColorJitter'),
dict(p=0.5, type='AdvancedBlur'),
dict(
always_apply=True,
height=512,
type='CenterCrop',
width=512),
],
type='Albu'),
dict(
cutout_ratio=(
0.02,
0.05,
),
n_holes=10,
prob=0.5,
type='RandomCutOut'),
dict(
interpolation='lanczos',
keep_ratio=True,
scale=(
512,
512,
),
type='Resize'),
dict(
meta_keys=(
'img_path',
'img_shape',
'img',
'gt_seg_map',
),
type='PackSegInputs'),
],
type='StomataDataset'),
num_workers=16)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(
max_size=2048,
resize_type='ResizeShortestEdge',
scales=[
358,
409,
460,
512,
563,
614,
665,
716,
],
type='RandomChoiceResize'),
dict(cat_max_ratio=0.75, crop_size=(
512,
512,
), type='RandomCrop'),
dict(
keymap=dict(gt_seg_map='mask', img='image'),
transforms=[
dict(
always_apply=True,
border_mode=0,
min_height=512,
min_width=512,
type='PadIfNeeded'),
dict(always_apply=True, type='Flip'),
dict(
always_apply=True,
interpolation=4,
limit=(
-180,
180,
),
type='Rotate'),
dict(
p=0.5,
transforms=[
dict(
alpha=20,
approximate=True,
border_mode=0,
interpolation=4,
mask_value=(
0,
0,
0,
),
p=0.5,
same_dxdy=True,
sigma=15,
type='ElasticTransform'),
dict(
alpha=40,
approximate=True,
border_mode=0,
interpolation=4,
mask_value=(
0,
0,
0,
),
p=0.5,
same_dxdy=False,
sigma=15,
type='ElasticTransform'),
],
type='OneOf'),
dict(
always_apply=True,
brightness=0.2,
contrast=0.1,
hue=0.2,
saturation=0.2,
type='ColorJitter'),
dict(p=0.5, type='AdvancedBlur'),
dict(always_apply=True, height=512, type='CenterCrop', width=512),
],
type='Albu'),
dict(
cutout_ratio=(
0.02,
0.05,
), n_holes=10, prob=0.5, type='RandomCutOut'),
dict(
interpolation='lanczos',
keep_ratio=True,
scale=(
512,
512,
),
type='Resize'),
dict(
meta_keys=(
'img_path',
'img_shape',
'img',
'gt_seg_map',
),
type='PackSegInputs'),
]
tta_model = dict(type='SegTTAModel')
val_cfg = dict(type='ValLoop')
val_dataloader = dict(
batch_size=1,
dataset=dict(
ann_file='splits//val.txt',
data_prefix=dict(img_path='images', seg_map_path='labels'),
data_root='StomataPy400K_filtered_train/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(prob=0.5, type='RandomFlip'),
dict(keep_ratio=False, scale=(
512,
512,
), type='Resize'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='PackSegInputs'),
],
type='StomataDataset'),
num_workers=16)
val_evaluator = dict(
iou_metrics=[
'mIoU',
], type='IoUMetric')
val_interval = 1
visualizer = dict(
name='visualizer',
type='SegLocalVisualizer',
vis_backends=[
dict(type='LocalVisBackend'),
dict(
init_kwargs=dict(
name='StomataPy400K_aperture_512', project='StomataPy'),
type='WandbVisBackend'),
])
wandb_project = 'StomataPy'
warmup_epochs = 30
work_dir = '../Models/StomataPy400K_aperture_512'