faster-rcnn-rope-vit-tiny-coco / faster_rcnn_rope_vit_tiny_coco.py
aadex's picture
Upload faster_rcnn_rope_vit_tiny_coco.py with huggingface_hub
bd47f44 verified
auto_scale_lr = dict(base_batch_size=16, enable=True)
backend_args = None
crop_size = (
512,
512,
)
custom_imports = dict(
allow_failed_imports=False, imports=[
'detection',
])
data_root = 'data/coco/'
dataset_type = 'CocoDataset'
default_hooks = dict(
checkpoint=dict(interval=1, save_best='auto', type='CheckpointHook'),
logger=dict(interval=50, type='LoggerHook'),
param_scheduler=dict(type='ParamSchedulerHook'),
sampler_seed=dict(type='DistSamplerSeedHook'),
timer=dict(type='IterTimerHook'),
visualization=dict(type='DetVisualizationHook'))
default_scope = 'mmdet'
depth = 12
env_cfg = dict(
cudnn_benchmark=True,
dist_cfg=dict(backend='nccl'),
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
hidden_dim = 192
img_size = 512
launcher = 'none'
load_from = None
log_level = 'INFO'
log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
mlp_dim = 768
model = dict(
backbone=dict(
attention_dropout=0.0,
depth=12,
dropout=0.0,
hidden_dim=192,
img_size=512,
in_chans=3,
init_cfg=dict(
checkpoint='checkpoints/rope_vit_imagenet100_best.pth',
type='Pretrained'),
mlp_dim=768,
num_heads=3,
out_indices=(
2,
5,
8,
11,
),
patch_size=16,
pretrain_img_size=224,
rope_theta=10.0,
type='RoPEViTBackbone'),
data_preprocessor=dict(
bgr_to_rgb=True,
mean=[
123.675,
116.28,
103.53,
],
pad_size_divisor=32,
std=[
58.395,
57.12,
57.375,
],
type='DetDataPreprocessor'),
neck=dict(
backbone_channel=192,
norm_cfg=dict(requires_grad=True, type='LN2d'),
num_outs=5,
out_channels=256,
type='SimpleViTFPN'),
roi_head=dict(
bbox_head=dict(
bbox_coder=dict(
target_means=[
0.0,
0.0,
0.0,
0.0,
],
target_stds=[
0.1,
0.1,
0.2,
0.2,
],
type='DeltaXYWHBBoxCoder'),
conv_out_channels=256,
fc_out_channels=1024,
in_channels=256,
loss_bbox=dict(loss_weight=1.0, type='L1Loss'),
loss_cls=dict(
loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False),
norm_cfg=dict(requires_grad=True, type='LN2d'),
num_classes=80,
reg_class_agnostic=False,
roi_feat_size=7,
type='Shared4Conv1FCBBoxHead'),
bbox_roi_extractor=dict(
featmap_strides=[
4,
8,
16,
32,
],
out_channels=256,
roi_layer=dict(output_size=7, sampling_ratio=0, type='RoIAlign'),
type='SingleRoIExtractor'),
type='StandardRoIHead'),
rpn_head=dict(
anchor_generator=dict(
ratios=[
0.5,
1.0,
2.0,
],
scales=[
8,
],
strides=[
4,
8,
16,
32,
64,
],
type='AnchorGenerator'),
bbox_coder=dict(
target_means=[
0.0,
0.0,
0.0,
0.0,
],
target_stds=[
1.0,
1.0,
1.0,
1.0,
],
type='DeltaXYWHBBoxCoder'),
feat_channels=256,
in_channels=256,
loss_bbox=dict(loss_weight=1.0, type='L1Loss'),
loss_cls=dict(
loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True),
num_convs=2,
type='RPNHead'),
test_cfg=dict(
rcnn=dict(
max_per_img=100,
nms=dict(iou_threshold=0.5, type='nms'),
score_thr=0.05),
rpn=dict(
max_per_img=1000,
min_bbox_size=0,
nms=dict(iou_threshold=0.7, type='nms'),
nms_pre=1000)),
train_cfg=dict(
rcnn=dict(
assigner=dict(
ignore_iof_thr=-1,
match_low_quality=False,
min_pos_iou=0.5,
neg_iou_thr=0.5,
pos_iou_thr=0.5,
type='MaxIoUAssigner'),
debug=False,
pos_weight=-1,
sampler=dict(
add_gt_as_proposals=True,
neg_pos_ub=-1,
num=512,
pos_fraction=0.25,
type='RandomSampler')),
rpn=dict(
allowed_border=-1,
assigner=dict(
ignore_iof_thr=-1,
match_low_quality=True,
min_pos_iou=0.3,
neg_iou_thr=0.3,
pos_iou_thr=0.7,
type='MaxIoUAssigner'),
debug=False,
pos_weight=-1,
sampler=dict(
add_gt_as_proposals=False,
neg_pos_ub=-1,
num=256,
pos_fraction=0.5,
type='RandomSampler')),
rpn_proposal=dict(
max_per_img=1000,
min_bbox_size=0,
nms=dict(iou_threshold=0.7, type='nms'),
nms_pre=2000)),
type='FasterRCNN')
num_classes = 80
num_heads = 3
optim_wrapper = dict(
clip_grad=dict(max_norm=0.1, norm_type=2),
optimizer=dict(
betas=(
0.9,
0.999,
), lr=0.0001, type='AdamW', weight_decay=0.05),
paramwise_cfg=dict(custom_keys=dict(backbone=dict(lr_mult=0.1))),
type='AmpOptimWrapper')
param_scheduler = [
dict(
begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
dict(
begin=0,
by_epoch=True,
end=12,
gamma=0.1,
milestones=[
8,
11,
],
type='MultiStepLR'),
]
patch_size = 16
resume = False
test_cfg = dict(type='TestLoop')
test_dataloader = dict(
batch_size=1,
dataset=dict(
ann_file='annotations/instances_val2017.json',
backend_args=None,
data_prefix=dict(img='val2017/'),
data_root='data/coco/',
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
512,
512,
), type='Resize'),
dict(
pad_val=dict(img=(
114,
114,
114,
)),
size=(
512,
512,
),
type='Pad'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
],
test_mode=True,
type='CocoDataset'),
drop_last=False,
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
test_evaluator = dict(
ann_file='data/coco/annotations/instances_val2017.json',
backend_args=None,
format_only=False,
metric='bbox',
type='CocoMetric')
test_pipeline = [
dict(backend_args=None, type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
512,
512,
), type='Resize'),
dict(pad_val=dict(img=(
114,
114,
114,
)), size=(
512,
512,
), type='Pad'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
]
train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
train_dataloader = dict(
batch_sampler=dict(type='AspectRatioBatchSampler'),
batch_size=16,
dataset=dict(
ann_file='annotations/instances_train2017.json',
backend_args=None,
data_prefix=dict(img='train2017/'),
data_root='data/coco/',
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(keep_ratio=True, scale=(
512,
512,
), type='Resize'),
dict(prob=0.5, type='RandomFlip'),
dict(
pad_val=dict(img=(
114,
114,
114,
)),
size=(
512,
512,
),
type='Pad'),
dict(type='PackDetInputs'),
],
type='CocoDataset'),
num_workers=8,
persistent_workers=True,
sampler=dict(shuffle=True, type='DefaultSampler'))
train_pipeline = [
dict(backend_args=None, type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(keep_ratio=True, scale=(
512,
512,
), type='Resize'),
dict(prob=0.5, type='RandomFlip'),
dict(pad_val=dict(img=(
114,
114,
114,
)), size=(
512,
512,
), type='Pad'),
dict(type='PackDetInputs'),
]
val_cfg = dict(type='ValLoop')
val_dataloader = dict(
batch_size=1,
dataset=dict(
ann_file='annotations/instances_val2017.json',
backend_args=None,
data_prefix=dict(img='val2017/'),
data_root='data/coco/',
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
512,
512,
), type='Resize'),
dict(
pad_val=dict(img=(
114,
114,
114,
)),
size=(
512,
512,
),
type='Pad'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
],
test_mode=True,
type='CocoDataset'),
drop_last=False,
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
val_evaluator = dict(
ann_file='data/coco/annotations/instances_val2017.json',
backend_args=None,
format_only=False,
metric='bbox',
type='CocoMetric')
vis_backends = [
dict(type='LocalVisBackend'),
dict(
init_kwargs=dict(
name='faster_rcnn_rope_vit_tiny_coco_512',
project='vit-detection',
tags=[
'rope_vit',
'coco',
'faster_rcnn',
'extrapolation',
]),
type='WandbVisBackend'),
]
visualizer = dict(
name='visualizer',
type='DetLocalVisualizer',
vis_backends=[
dict(type='LocalVisBackend'),
dict(
init_kwargs=dict(
name='faster_rcnn_rope_vit_tiny_coco_512',
project='vit-detection',
tags=[
'rope_vit',
'coco',
'faster_rcnn',
'extrapolation',
]),
type='WandbVisBackend'),
])
work_dir = './work_dirs/faster_rcnn_rope_vit_tiny_coco'