add general
Browse files- ckpts/ViTP_InternVL_1B_general.safetensors +3 -0
- ckpts/ViTP_ViT_L_300M_general.safetensors +3 -0
- ckpts/vitp_ade20k_upernet_5575/20251020_221526.log +0 -0
- ckpts/vitp_ade20k_upernet_5575/iter_160000.pth +3 -0
- ckpts/vitp_ade20k_upernet_5575/vitp_ade20k_upernet.py +202 -0
- ckpts/vitp_coco_maskrcnn_539/20251025_101330.log +0 -0
- ckpts/vitp_coco_maskrcnn_539/20251025_101330.log.json +181 -0
- ckpts/vitp_coco_maskrcnn_539/epoch_12.pth +3 -0
- ckpts/vitp_coco_maskrcnn_539/vitp_coco_maskrcnn_bs8_lr3e-5_dpr03_ld_60.py +357 -0
ckpts/ViTP_InternVL_1B_general.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f585a65053365a5107ce59d67b64071d80ed62f5264ccf6e852bc7374c41a84a
|
| 3 |
+
size 1876463472
|
ckpts/ViTP_ViT_L_300M_general.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13ae8c2cda653e1ca0a8f7b766ebe3a9a0d20f40bb9c6b1af61b9e6b2a157a89
|
| 3 |
+
size 617029872
|
ckpts/vitp_ade20k_upernet_5575/20251020_221526.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_ade20k_upernet_5575/iter_160000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:469aaf5b60aca70f34e85f3caceca839ed5a348258431e60462e334a4708fac1
|
| 3 |
+
size 1499311765
|
ckpts/vitp_ade20k_upernet_5575/vitp_ade20k_upernet.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type = 'ADE20KDataset'
|
| 2 |
+
data_root = '/home/share/seg_datasets/ade/ADEChallengeData2016'
|
| 3 |
+
img_norm_cfg = dict(
|
| 4 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 5 |
+
crop_size = (512, 512)
|
| 6 |
+
train_pipeline = [
|
| 7 |
+
dict(type='LoadImageFromFile'),
|
| 8 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
| 9 |
+
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
|
| 10 |
+
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
| 11 |
+
dict(type='RandomFlip', prob=0.5),
|
| 12 |
+
dict(type='PhotoMetricDistortion'),
|
| 13 |
+
dict(
|
| 14 |
+
type='Normalize',
|
| 15 |
+
mean=[123.675, 116.28, 103.53],
|
| 16 |
+
std=[58.395, 57.12, 57.375],
|
| 17 |
+
to_rgb=True),
|
| 18 |
+
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
|
| 19 |
+
dict(type='DefaultFormatBundle'),
|
| 20 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 21 |
+
]
|
| 22 |
+
test_pipeline = [
|
| 23 |
+
dict(type='LoadImageFromFile'),
|
| 24 |
+
dict(
|
| 25 |
+
type='MultiScaleFlipAug',
|
| 26 |
+
img_scale=(2048, 512),
|
| 27 |
+
flip=False,
|
| 28 |
+
transforms=[
|
| 29 |
+
dict(type='Resize', keep_ratio=True),
|
| 30 |
+
dict(type='RandomFlip'),
|
| 31 |
+
dict(
|
| 32 |
+
type='Normalize',
|
| 33 |
+
mean=[123.675, 116.28, 103.53],
|
| 34 |
+
std=[58.395, 57.12, 57.375],
|
| 35 |
+
to_rgb=True),
|
| 36 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 37 |
+
dict(type='Collect', keys=['img'])
|
| 38 |
+
])
|
| 39 |
+
]
|
| 40 |
+
data = dict(
|
| 41 |
+
samples_per_gpu=2,
|
| 42 |
+
workers_per_gpu=4,
|
| 43 |
+
train=dict(
|
| 44 |
+
type='ADE20KDataset',
|
| 45 |
+
data_root='/home/share/seg_datasets/ade/ADEChallengeData2016',
|
| 46 |
+
img_dir='images/training',
|
| 47 |
+
ann_dir='annotations/training',
|
| 48 |
+
pipeline=[
|
| 49 |
+
dict(type='LoadImageFromFile'),
|
| 50 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
| 51 |
+
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
|
| 52 |
+
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
| 53 |
+
dict(type='RandomFlip', prob=0.5),
|
| 54 |
+
dict(type='PhotoMetricDistortion'),
|
| 55 |
+
dict(
|
| 56 |
+
type='Normalize',
|
| 57 |
+
mean=[123.675, 116.28, 103.53],
|
| 58 |
+
std=[58.395, 57.12, 57.375],
|
| 59 |
+
to_rgb=True),
|
| 60 |
+
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
|
| 61 |
+
dict(type='DefaultFormatBundle'),
|
| 62 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 63 |
+
]),
|
| 64 |
+
val=dict(
|
| 65 |
+
type='ADE20KDataset',
|
| 66 |
+
data_root='/home/share/seg_datasets/ade/ADEChallengeData2016',
|
| 67 |
+
img_dir='images/validation',
|
| 68 |
+
ann_dir='annotations/validation',
|
| 69 |
+
pipeline=[
|
| 70 |
+
dict(type='LoadImageFromFile'),
|
| 71 |
+
dict(
|
| 72 |
+
type='MultiScaleFlipAug',
|
| 73 |
+
img_scale=(2048, 512),
|
| 74 |
+
flip=False,
|
| 75 |
+
transforms=[
|
| 76 |
+
dict(type='Resize', keep_ratio=True),
|
| 77 |
+
dict(type='RandomFlip'),
|
| 78 |
+
dict(
|
| 79 |
+
type='Normalize',
|
| 80 |
+
mean=[123.675, 116.28, 103.53],
|
| 81 |
+
std=[58.395, 57.12, 57.375],
|
| 82 |
+
to_rgb=True),
|
| 83 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 84 |
+
dict(type='Collect', keys=['img'])
|
| 85 |
+
])
|
| 86 |
+
]),
|
| 87 |
+
test=dict(
|
| 88 |
+
type='ADE20KDataset',
|
| 89 |
+
data_root='/home/share/seg_datasets/ade/ADEChallengeData2016',
|
| 90 |
+
img_dir='images/validation',
|
| 91 |
+
ann_dir='annotations/validation',
|
| 92 |
+
pipeline=[
|
| 93 |
+
dict(type='LoadImageFromFile'),
|
| 94 |
+
dict(
|
| 95 |
+
type='MultiScaleFlipAug',
|
| 96 |
+
img_scale=(2048, 512),
|
| 97 |
+
flip=False,
|
| 98 |
+
transforms=[
|
| 99 |
+
dict(type='Resize', keep_ratio=True),
|
| 100 |
+
dict(type='RandomFlip'),
|
| 101 |
+
dict(
|
| 102 |
+
type='Normalize',
|
| 103 |
+
mean=[123.675, 116.28, 103.53],
|
| 104 |
+
std=[58.395, 57.12, 57.375],
|
| 105 |
+
to_rgb=True),
|
| 106 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 107 |
+
dict(type='Collect', keys=['img'])
|
| 108 |
+
])
|
| 109 |
+
]))
|
| 110 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 111 |
+
model = dict(
|
| 112 |
+
type='EncoderDecoder',
|
| 113 |
+
pretrained=None,
|
| 114 |
+
backbone=dict(
|
| 115 |
+
type='InternViTAdapter',
|
| 116 |
+
pretrain_size=448,
|
| 117 |
+
img_size=512,
|
| 118 |
+
patch_size=16,
|
| 119 |
+
embed_dim=1024,
|
| 120 |
+
depth=24,
|
| 121 |
+
num_heads=16,
|
| 122 |
+
mlp_ratio=4.0,
|
| 123 |
+
drop_path_rate=0.15,
|
| 124 |
+
init_values=1e-05,
|
| 125 |
+
with_cp=True,
|
| 126 |
+
use_flash_attn=False,
|
| 127 |
+
qk_normalization=False,
|
| 128 |
+
layerscale_force_fp32=False,
|
| 129 |
+
with_fpn=False,
|
| 130 |
+
freeze_vit=False,
|
| 131 |
+
use_final_norm=True,
|
| 132 |
+
interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
|
| 133 |
+
cffn_ratio=0.25,
|
| 134 |
+
deform_ratio=0.25,
|
| 135 |
+
qkv_bias=True,
|
| 136 |
+
norm_type='layer_norm',
|
| 137 |
+
pretrained=
|
| 138 |
+
'/home/u1120230285/lyx/InternVL/internvl_chat/work_dirs/ft_full_1b_16ksteps_instruct_tuning_as_pretrain_TMAug75_general/ViTP_general_16k/ViTP_general_16k.safetensors',
|
| 139 |
+
pretrained_type='full'),
|
| 140 |
+
decode_head=dict(
|
| 141 |
+
type='UPerHead',
|
| 142 |
+
in_channels=[1024, 1024, 1024, 1024],
|
| 143 |
+
in_index=[0, 1, 2, 3],
|
| 144 |
+
pool_scales=(1, 2, 3, 6),
|
| 145 |
+
channels=512,
|
| 146 |
+
dropout_ratio=0.1,
|
| 147 |
+
num_classes=150,
|
| 148 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 149 |
+
align_corners=False,
|
| 150 |
+
loss_decode=dict(
|
| 151 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 152 |
+
auxiliary_head=dict(
|
| 153 |
+
type='FCNHead',
|
| 154 |
+
in_channels=1024,
|
| 155 |
+
in_index=2,
|
| 156 |
+
channels=256,
|
| 157 |
+
num_convs=1,
|
| 158 |
+
concat_input=False,
|
| 159 |
+
dropout_ratio=0.1,
|
| 160 |
+
num_classes=150,
|
| 161 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 162 |
+
align_corners=False,
|
| 163 |
+
loss_decode=dict(
|
| 164 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 165 |
+
train_cfg=dict(),
|
| 166 |
+
test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)))
|
| 167 |
+
log_config = dict(
|
| 168 |
+
interval=1000,
|
| 169 |
+
hooks=[
|
| 170 |
+
dict(type='TextLoggerHook', by_epoch=False),
|
| 171 |
+
dict(type='TensorboardLoggerHook')
|
| 172 |
+
])
|
| 173 |
+
dist_params = dict(backend='nccl')
|
| 174 |
+
log_level = 'INFO'
|
| 175 |
+
load_from = None
|
| 176 |
+
resume_from = None
|
| 177 |
+
workflow = [('train', 1)]
|
| 178 |
+
cudnn_benchmark = True
|
| 179 |
+
optimizer = dict(
|
| 180 |
+
type='AdamW',
|
| 181 |
+
lr=1e-05,
|
| 182 |
+
betas=(0.9, 0.999),
|
| 183 |
+
weight_decay=0.1,
|
| 184 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 185 |
+
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95))
|
| 186 |
+
optimizer_config = dict()
|
| 187 |
+
lr_config = dict(
|
| 188 |
+
policy='poly',
|
| 189 |
+
warmup='linear',
|
| 190 |
+
warmup_iters=1500,
|
| 191 |
+
warmup_ratio=1e-06,
|
| 192 |
+
power=1.0,
|
| 193 |
+
min_lr=0.0,
|
| 194 |
+
by_epoch=False)
|
| 195 |
+
runner = dict(type='IterBasedRunner', max_iters=160000)
|
| 196 |
+
checkpoint_config = dict(by_epoch=False, interval=8000, max_keep_ckpts=3)
|
| 197 |
+
evaluation = dict(interval=8000, metric='mIoU', pre_eval=True)
|
| 198 |
+
pretrained = '/home/u1120230285/lyx/InternVL/internvl_chat/work_dirs/ft_full_1b_16ksteps_instruct_tuning_as_pretrain_TMAug75_general/ViTP_general_16k/ViTP_general_16k.safetensors'
|
| 199 |
+
fp16 = None
|
| 200 |
+
work_dir = './work_dirs/vitp_ade20k_upernet_dp15'
|
| 201 |
+
gpu_ids = range(0, 8)
|
| 202 |
+
auto_resume = False
|
ckpts/vitp_coco_maskrcnn_539/20251025_101330.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_coco_maskrcnn_539/20251025_101330.log.json
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"env_info": "sys.platform: linux\nPython: 3.9.23 | packaged by conda-forge | (main, Jun 4 2025, 17:57:12) [GCC 13.3.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA A40\nCUDA_HOME: /usr/local/cuda\nNVCC: Cuda compilation tools, release 12.2, V12.2.91\nGCC: gcc (conda-forge gcc 15.1.0-4) 15.1.0\nPyTorch: 1.13.1+cu116\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.6\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.14.1+cu116\nOpenCV: 4.5.4\nMMCV: 1.7.0\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.6\nMMRotate: 0.3.4+7b4764d", "config": "dataset_type = 'CocoDataset'\ndata_root = '/home/share/coco/'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n dict(type='RandomFlip', flip_ratio=0.5),\n dict(\n type='AutoAugment',\n policies=[[{\n 'type':\n 'Resize',\n 'img_scale': [(480, 1333), (512, 1333), (544, 1333), (576, 1333),\n (608, 1333), (640, 1333), (672, 1333), (704, 1333),\n (736, 1333), (768, 1333), (800, 1333)],\n 'multiscale_mode':\n 'value',\n 'keep_ratio':\n True\n }],\n [{\n 'type': 'Resize',\n 'img_scale': [(400, 1333), (500, 1333), (600, 1333)],\n 'multiscale_mode': 'value',\n 'keep_ratio': True\n }, {\n 'type': 'RandomCrop',\n 'crop_type': 'absolute_range',\n 'crop_size': (384, 600),\n 'allow_negative_crop': True\n }, {\n 'type':\n 'Resize',\n 'img_scale': [(480, 1333), (512, 1333), (544, 1333),\n (576, 1333), (608, 1333), (640, 1333),\n (672, 1333), (704, 1333), (736, 1333),\n (768, 1333), (800, 1333)],\n 'multiscale_mode':\n 'value',\n 'override':\n True,\n 'keep_ratio':\n True\n }]]),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(1333, 800),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=1,\n workers_per_gpu=1,\n train=dict(\n type='CocoDataset',\n ann_file='/home/share/coco/annotations/instances_train2017.json',\n img_prefix='/home/share/coco/train2017/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n dict(type='RandomFlip', flip_ratio=0.5),\n dict(\n type='AutoAugment',\n policies=[[{\n 'type':\n 'Resize',\n 'img_scale': [(480, 1333), (512, 1333), (544, 1333),\n (576, 1333), (608, 1333), (640, 1333),\n (672, 1333), (704, 1333), (736, 1333),\n (768, 1333), (800, 1333)],\n 'multiscale_mode':\n 'value',\n 'keep_ratio':\n True\n }],\n [{\n 'type': 'Resize',\n 'img_scale': [(400, 1333), (500, 1333),\n (600, 1333)],\n 'multiscale_mode': 'value',\n 'keep_ratio': True\n }, {\n 'type': 'RandomCrop',\n 'crop_type': 'absolute_range',\n 'crop_size': (384, 600),\n 'allow_negative_crop': True\n }, {\n 'type':\n 'Resize',\n 'img_scale': [(480, 1333), (512, 1333),\n (544, 1333), (576, 1333),\n (608, 1333), (640, 1333),\n (672, 1333), (704, 1333),\n (736, 1333), (768, 1333),\n (800, 1333)],\n 'multiscale_mode':\n 'value',\n 'override':\n True,\n 'keep_ratio':\n True\n }]]),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(\n type='Collect',\n keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])\n ]),\n val=dict(\n type='CocoDataset',\n ann_file='/home/share/coco/annotations/instances_val2017.json',\n img_prefix='/home/share/coco/val2017/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(1333, 800),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='CocoDataset',\n ann_file='/home/share/coco/annotations/instances_val2017.json',\n img_prefix='/home/share/coco/val2017/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(1333, 800),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nevaluation = dict(metric=['bbox', 'segm'], interval=1, classwise=True)\nmodel = dict(\n type='MaskRCNN',\n backbone=dict(\n type='InternViTAdapter',\n pretrain_size=448,\n img_size=(448, 448),\n patch_size=16,\n embed_dim=1024,\n depth=24,\n num_heads=16,\n mlp_ratio=4.0,\n drop_path_rate=0.2,\n init_values=1e-05,\n with_cp=True,\n use_flash_attn=False,\n qk_normalization=False,\n layerscale_force_fp32=False,\n with_fpn=False,\n freeze_vit=False,\n use_final_norm=True,\n interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],\n cffn_ratio=0.25,\n deform_ratio=0.25,\n qkv_bias=True,\n norm_type='layer_norm',\n pretrained=\n '/home/u1120230285/lyx/InternVL/internvl_chat/work_dirs/ft_full_1b_16ksteps_instruct_tuning_as_pretrain_TMAug75_general/ViTP_general_16k/ViTP_general_16k.safetensors',\n pretrained_type='full',\n only_feat_out=True),\n neck=dict(\n type='FPN',\n in_channels=[1024, 1024, 1024, 1024],\n out_channels=256,\n num_outs=5),\n rpn_head=dict(\n type='RPNHead',\n in_channels=256,\n feat_channels=256,\n anchor_generator=dict(\n type='AnchorGenerator',\n scales=[8],\n ratios=[0.5, 1.0, 2.0],\n strides=[4, 8, 16, 32, 64]),\n bbox_coder=dict(\n type='DeltaXYWHBBoxCoder',\n target_means=[0.0, 0.0, 0.0, 0.0],\n target_stds=[1.0, 1.0, 1.0, 1.0]),\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n roi_head=dict(\n type='StandardRoIHead',\n bbox_roi_extractor=dict(\n type='SingleRoIExtractor',\n roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n out_channels=256,\n featmap_strides=[4, 8, 16, 32]),\n bbox_head=dict(\n type='ConvFCBBoxHead',\n num_shared_convs=4,\n num_shared_fcs=1,\n in_channels=256,\n conv_out_channels=256,\n fc_out_channels=1024,\n roi_feat_size=7,\n num_classes=80,\n bbox_coder=dict(\n type='DeltaXYWHBBoxCoder',\n target_means=[0.0, 0.0, 0.0, 0.0],\n target_stds=[0.1, 0.1, 0.2, 0.2]),\n reg_class_agnostic=False,\n reg_decoded_bbox=True,\n norm_cfg=dict(type='SyncBN', requires_grad=True),\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),\n mask_roi_extractor=dict(\n type='SingleRoIExtractor',\n roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n out_channels=256,\n featmap_strides=[4, 8, 16, 32]),\n mask_head=dict(\n type='FCNMaskHead',\n num_convs=4,\n in_channels=256,\n conv_out_channels=256,\n num_classes=80,\n loss_mask=dict(\n type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\n train_cfg=dict(\n rpn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.7,\n neg_iou_thr=0.3,\n min_pos_iou=0.3,\n match_low_quality=True,\n gpu_assign_thr=300,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=256,\n pos_fraction=0.5,\n neg_pos_ub=-1,\n add_gt_as_proposals=False),\n allowed_border=-1,\n pos_weight=-1,\n debug=False),\n rpn_proposal=dict(\n nms_pre=2000,\n max_per_img=1000,\n nms=dict(type='nms', iou_threshold=0.7),\n min_bbox_size=0),\n rcnn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.5,\n neg_iou_thr=0.5,\n min_pos_iou=0.5,\n match_low_quality=True,\n gpu_assign_thr=300,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=512,\n pos_fraction=0.25,\n neg_pos_ub=-1,\n add_gt_as_proposals=True),\n mask_size=28,\n pos_weight=-1,\n debug=False)),\n test_cfg=dict(\n rpn=dict(\n nms_across_levels=False,\n nms_pre=1000,\n max_per_img=1000,\n nms_post=1000,\n nms=dict(type='nms', iou_threshold=0.7),\n min_bbox_size=0),\n rcnn=dict(\n score_thr=0.05,\n nms=dict(type='nms', iou_threshold=0.5),\n max_per_img=100,\n mask_thr_binary=0.5)))\noptimizer_config = dict(grad_clip=None)\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\noptimizer = dict(\n type='AdamW',\n lr=3e-05,\n betas=(0.9, 0.999),\n weight_decay=0.05,\n constructor='InternViTAdapterLayerDecayOptimizerConstructor',\n paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.85))\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=500,\n warmup_ratio=0.001,\n step=[8, 11])\ncheckpoint_config = dict(interval=1)\nlog_config = dict(interval=1000, hooks=[dict(type='TextLoggerHook')])\ncustom_hooks = [dict(type='NumClassCheckHook')]\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\nopencv_num_threads = 0\nmp_start_method = 'fork'\nauto_scale_lr = dict(enable=False, base_batch_size=16)\nauto_resume = False\ngpu_ids = range(0, 8)\ndevice = 'cuda'\nwork_dir = './work_dirs/vitp_coco_maskrcnn_bs8_lr3e-5_dpr03_ld_60'\n", "seed": 1101731743, "exp_name": "vitp_coco_maskrcnn_bs8_lr3e-5_dpr03_ld_60.py"}
|
| 2 |
+
{"mode": "train", "epoch": 1, "iter": 1000, "lr": 0.0, "memory": 16231, "data_time": 0.0144, "loss_rpn_cls": 0.17405, "loss_rpn_bbox": 0.07422, "loss_cls": 0.67155, "acc": 89.49653, "loss_bbox": 0.29963, "loss_mask": 0.61383, "loss": 1.83329, "time": 1.80288}
|
| 3 |
+
{"mode": "train", "epoch": 1, "iter": 2000, "lr": 0.0, "memory": 16609, "data_time": 0.01088, "loss_rpn_cls": 0.06649, "loss_rpn_bbox": 0.0641, "loss_cls": 0.38962, "acc": 89.74343, "loss_bbox": 0.404, "loss_mask": 0.39441, "loss": 1.31863, "time": 1.78144}
|
| 4 |
+
{"mode": "train", "epoch": 1, "iter": 3000, "lr": 0.0, "memory": 16613, "data_time": 0.01054, "loss_rpn_cls": 0.05607, "loss_rpn_bbox": 0.05945, "loss_cls": 0.32523, "acc": 90.07888, "loss_bbox": 0.39117, "loss_mask": 0.35546, "loss": 1.18738, "time": 1.81229}
|
| 5 |
+
{"mode": "train", "epoch": 1, "iter": 4000, "lr": 0.0, "memory": 16613, "data_time": 0.01072, "loss_rpn_cls": 0.04893, "loss_rpn_bbox": 0.05537, "loss_cls": 0.29416, "acc": 90.697, "loss_bbox": 0.36326, "loss_mask": 0.33051, "loss": 1.09222, "time": 1.8139}
|
| 6 |
+
{"mode": "train", "epoch": 1, "iter": 5000, "lr": 0.0, "memory": 16613, "data_time": 0.01065, "loss_rpn_cls": 0.04702, "loss_rpn_bbox": 0.05388, "loss_cls": 0.28195, "acc": 90.87158, "loss_bbox": 0.35129, "loss_mask": 0.31703, "loss": 1.05116, "time": 1.79733}
|
| 7 |
+
{"mode": "train", "epoch": 1, "iter": 6000, "lr": 0.0, "memory": 16613, "data_time": 0.0105, "loss_rpn_cls": 0.0461, "loss_rpn_bbox": 0.05216, "loss_cls": 0.27272, "acc": 91.13093, "loss_bbox": 0.34021, "loss_mask": 0.30441, "loss": 1.0156, "time": 1.80991}
|
| 8 |
+
{"mode": "train", "epoch": 1, "iter": 7000, "lr": 0.0, "memory": 16613, "data_time": 0.0103, "loss_rpn_cls": 0.04452, "loss_rpn_bbox": 0.04928, "loss_cls": 0.26036, "acc": 91.38269, "loss_bbox": 0.3285, "loss_mask": 0.29308, "loss": 0.97575, "time": 1.80047}
|
| 9 |
+
{"mode": "train", "epoch": 1, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.01025, "loss_rpn_cls": 0.04312, "loss_rpn_bbox": 0.04955, "loss_cls": 0.25379, "acc": 91.59602, "loss_bbox": 0.32129, "loss_mask": 0.28836, "loss": 0.9561, "time": 1.79418}
|
| 10 |
+
{"mode": "train", "epoch": 1, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.01031, "loss_rpn_cls": 0.04072, "loss_rpn_bbox": 0.0476, "loss_cls": 0.24668, "acc": 91.78035, "loss_bbox": 0.31446, "loss_mask": 0.28156, "loss": 0.93101, "time": 1.81323}
|
| 11 |
+
{"mode": "train", "epoch": 1, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01087, "loss_rpn_cls": 0.04017, "loss_rpn_bbox": 0.04688, "loss_cls": 0.24817, "acc": 91.75352, "loss_bbox": 0.3132, "loss_mask": 0.27833, "loss": 0.92675, "time": 1.78742}
|
| 12 |
+
{"mode": "train", "epoch": 1, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.0107, "loss_rpn_cls": 0.03893, "loss_rpn_bbox": 0.04664, "loss_cls": 0.24112, "acc": 91.96506, "loss_bbox": 0.30501, "loss_mask": 0.27352, "loss": 0.90522, "time": 1.79925}
|
| 13 |
+
{"mode": "train", "epoch": 1, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01061, "loss_rpn_cls": 0.04039, "loss_rpn_bbox": 0.04566, "loss_cls": 0.23687, "acc": 92.0467, "loss_bbox": 0.3014, "loss_mask": 0.27015, "loss": 0.89447, "time": 1.78713}
|
| 14 |
+
{"mode": "train", "epoch": 1, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.01015, "loss_rpn_cls": 0.03718, "loss_rpn_bbox": 0.04392, "loss_cls": 0.22838, "acc": 92.29797, "loss_bbox": 0.29369, "loss_mask": 0.26559, "loss": 0.86877, "time": 1.79492}
|
| 15 |
+
{"mode": "train", "epoch": 1, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.01017, "loss_rpn_cls": 0.03679, "loss_rpn_bbox": 0.04404, "loss_cls": 0.23332, "acc": 92.15681, "loss_bbox": 0.29518, "loss_mask": 0.26392, "loss": 0.87325, "time": 1.80592}
|
| 16 |
+
{"mode": "val", "epoch": 1, "iter": 625, "lr": 0.0, "bbox_mAP": 0.401, "bbox_mAP_50": 0.653, "bbox_mAP_75": 0.431, "bbox_mAP_s": 0.248, "bbox_mAP_m": 0.443, "bbox_mAP_l": 0.529, "bbox_mAP_copypaste": "0.401 0.653 0.431 0.248 0.443 0.529", "segm_mAP": 0.349, "segm_mAP_50": 0.604, "segm_mAP_75": 0.357, "segm_mAP_s": 0.159, "segm_mAP_m": 0.381, "segm_mAP_l": 0.539, "segm_mAP_copypaste": "0.349 0.604 0.357 0.159 0.381 0.539"}
|
| 17 |
+
{"mode": "train", "epoch": 2, "iter": 1000, "lr": 0.0, "memory": 16625, "data_time": 0.01422, "loss_rpn_cls": 0.03466, "loss_rpn_bbox": 0.04227, "loss_cls": 0.2218, "acc": 92.38877, "loss_bbox": 0.28996, "loss_mask": 0.25734, "loss": 0.84604, "time": 1.78806}
|
| 18 |
+
{"mode": "train", "epoch": 2, "iter": 2000, "lr": 0.0, "memory": 16625, "data_time": 0.0101, "loss_rpn_cls": 0.03573, "loss_rpn_bbox": 0.04232, "loss_cls": 0.21966, "acc": 92.49043, "loss_bbox": 0.28624, "loss_mask": 0.25636, "loss": 0.84032, "time": 1.78184}
|
| 19 |
+
{"mode": "train", "epoch": 2, "iter": 3000, "lr": 0.0, "memory": 16625, "data_time": 0.01007, "loss_rpn_cls": 0.03416, "loss_rpn_bbox": 0.04215, "loss_cls": 0.22091, "acc": 92.37444, "loss_bbox": 0.28664, "loss_mask": 0.25421, "loss": 0.83807, "time": 1.79325}
|
| 20 |
+
{"mode": "train", "epoch": 2, "iter": 4000, "lr": 0.0, "memory": 16625, "data_time": 0.01037, "loss_rpn_cls": 0.03326, "loss_rpn_bbox": 0.04108, "loss_cls": 0.21509, "acc": 92.59753, "loss_bbox": 0.27948, "loss_mask": 0.2518, "loss": 0.82072, "time": 1.80273}
|
| 21 |
+
{"mode": "train", "epoch": 2, "iter": 5000, "lr": 0.0, "memory": 16625, "data_time": 0.01016, "loss_rpn_cls": 0.03357, "loss_rpn_bbox": 0.04068, "loss_cls": 0.21597, "acc": 92.58228, "loss_bbox": 0.28021, "loss_mask": 0.25033, "loss": 0.82076, "time": 1.79868}
|
| 22 |
+
{"mode": "train", "epoch": 2, "iter": 6000, "lr": 0.0, "memory": 16625, "data_time": 0.01038, "loss_rpn_cls": 0.03244, "loss_rpn_bbox": 0.04093, "loss_cls": 0.21579, "acc": 92.59331, "loss_bbox": 0.2794, "loss_mask": 0.24986, "loss": 0.81841, "time": 1.78729}
|
| 23 |
+
{"mode": "train", "epoch": 2, "iter": 7000, "lr": 0.0, "memory": 16625, "data_time": 0.01024, "loss_rpn_cls": 0.0325, "loss_rpn_bbox": 0.03992, "loss_cls": 0.21081, "acc": 92.74434, "loss_bbox": 0.27422, "loss_mask": 0.24693, "loss": 0.80436, "time": 1.78085}
|
| 24 |
+
{"mode": "train", "epoch": 2, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.01052, "loss_rpn_cls": 0.03366, "loss_rpn_bbox": 0.04094, "loss_cls": 0.21561, "acc": 92.61519, "loss_bbox": 0.27608, "loss_mask": 0.2481, "loss": 0.81438, "time": 1.79896}
|
| 25 |
+
{"mode": "train", "epoch": 2, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.01062, "loss_rpn_cls": 0.03221, "loss_rpn_bbox": 0.04011, "loss_cls": 0.21034, "acc": 92.70833, "loss_bbox": 0.27461, "loss_mask": 0.24727, "loss": 0.80452, "time": 1.80132}
|
| 26 |
+
{"mode": "train", "epoch": 2, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01083, "loss_rpn_cls": 0.03328, "loss_rpn_bbox": 0.0399, "loss_cls": 0.21335, "acc": 92.6509, "loss_bbox": 0.27387, "loss_mask": 0.246, "loss": 0.8064, "time": 1.78969}
|
| 27 |
+
{"mode": "train", "epoch": 2, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.01089, "loss_rpn_cls": 0.03143, "loss_rpn_bbox": 0.0387, "loss_cls": 0.20407, "acc": 92.95234, "loss_bbox": 0.26672, "loss_mask": 0.24314, "loss": 0.78406, "time": 1.79105}
|
| 28 |
+
{"mode": "train", "epoch": 2, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01096, "loss_rpn_cls": 0.03141, "loss_rpn_bbox": 0.03921, "loss_cls": 0.20695, "acc": 92.78494, "loss_bbox": 0.26919, "loss_mask": 0.24233, "loss": 0.78909, "time": 1.81167}
|
| 29 |
+
{"mode": "train", "epoch": 2, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.01074, "loss_rpn_cls": 0.03174, "loss_rpn_bbox": 0.03919, "loss_cls": 0.20196, "acc": 92.97373, "loss_bbox": 0.26373, "loss_mask": 0.24078, "loss": 0.7774, "time": 1.78887}
|
| 30 |
+
{"mode": "train", "epoch": 2, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.0102, "loss_rpn_cls": 0.03079, "loss_rpn_bbox": 0.03823, "loss_cls": 0.20136, "acc": 92.99856, "loss_bbox": 0.26168, "loss_mask": 0.24039, "loss": 0.77244, "time": 1.7847}
|
| 31 |
+
{"mode": "val", "epoch": 2, "iter": 625, "lr": 0.0, "bbox_mAP": 0.456, "bbox_mAP_50": 0.695, "bbox_mAP_75": 0.499, "bbox_mAP_s": 0.295, "bbox_mAP_m": 0.502, "bbox_mAP_l": 0.603, "bbox_mAP_copypaste": "0.456 0.695 0.499 0.295 0.502 0.603", "segm_mAP": 0.397, "segm_mAP_50": 0.652, "segm_mAP_75": 0.417, "segm_mAP_s": 0.204, "segm_mAP_m": 0.431, "segm_mAP_l": 0.593, "segm_mAP_copypaste": "0.397 0.652 0.417 0.204 0.431 0.593"}
|
| 32 |
+
{"mode": "train", "epoch": 3, "iter": 1000, "lr": 0.0, "memory": 16625, "data_time": 0.01393, "loss_rpn_cls": 0.02957, "loss_rpn_bbox": 0.03777, "loss_cls": 0.19723, "acc": 93.04951, "loss_bbox": 0.2634, "loss_mask": 0.23629, "loss": 0.76426, "time": 1.79799}
|
| 33 |
+
{"mode": "train", "epoch": 3, "iter": 2000, "lr": 0.0, "memory": 16625, "data_time": 0.01023, "loss_rpn_cls": 0.02959, "loss_rpn_bbox": 0.03785, "loss_cls": 0.19455, "acc": 93.19761, "loss_bbox": 0.25794, "loss_mask": 0.23499, "loss": 0.75492, "time": 1.80112}
|
| 34 |
+
{"mode": "train", "epoch": 3, "iter": 3000, "lr": 0.0, "memory": 16625, "data_time": 0.01049, "loss_rpn_cls": 0.02917, "loss_rpn_bbox": 0.03779, "loss_cls": 0.19804, "acc": 93.05098, "loss_bbox": 0.26249, "loss_mask": 0.2335, "loss": 0.76099, "time": 1.78695}
|
| 35 |
+
{"mode": "train", "epoch": 3, "iter": 4000, "lr": 0.0, "memory": 16625, "data_time": 0.0104, "loss_rpn_cls": 0.02814, "loss_rpn_bbox": 0.03772, "loss_cls": 0.19643, "acc": 93.04392, "loss_bbox": 0.26205, "loss_mask": 0.2343, "loss": 0.75863, "time": 1.80501}
|
| 36 |
+
{"mode": "train", "epoch": 3, "iter": 5000, "lr": 0.0, "memory": 16625, "data_time": 0.01065, "loss_rpn_cls": 0.02849, "loss_rpn_bbox": 0.03706, "loss_cls": 0.19653, "acc": 93.13508, "loss_bbox": 0.25676, "loss_mask": 0.23464, "loss": 0.75348, "time": 1.79831}
|
| 37 |
+
{"mode": "train", "epoch": 3, "iter": 6000, "lr": 0.0, "memory": 16625, "data_time": 0.01022, "loss_rpn_cls": 0.02895, "loss_rpn_bbox": 0.03714, "loss_cls": 0.19451, "acc": 93.17957, "loss_bbox": 0.25591, "loss_mask": 0.23201, "loss": 0.7485, "time": 1.80533}
|
| 38 |
+
{"mode": "train", "epoch": 3, "iter": 7000, "lr": 0.0, "memory": 16625, "data_time": 0.01002, "loss_rpn_cls": 0.0275, "loss_rpn_bbox": 0.03662, "loss_cls": 0.19304, "acc": 93.20889, "loss_bbox": 0.25506, "loss_mask": 0.23244, "loss": 0.74465, "time": 1.78882}
|
| 39 |
+
{"mode": "train", "epoch": 3, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.01033, "loss_rpn_cls": 0.02794, "loss_rpn_bbox": 0.0368, "loss_cls": 0.19176, "acc": 93.23645, "loss_bbox": 0.25561, "loss_mask": 0.23081, "loss": 0.74291, "time": 1.79218}
|
| 40 |
+
{"mode": "train", "epoch": 3, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.01038, "loss_rpn_cls": 0.02969, "loss_rpn_bbox": 0.03776, "loss_cls": 0.1975, "acc": 93.04822, "loss_bbox": 0.26021, "loss_mask": 0.23373, "loss": 0.75888, "time": 1.8012}
|
| 41 |
+
{"mode": "train", "epoch": 3, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01052, "loss_rpn_cls": 0.02799, "loss_rpn_bbox": 0.03718, "loss_cls": 0.19353, "acc": 93.18391, "loss_bbox": 0.25587, "loss_mask": 0.23153, "loss": 0.7461, "time": 1.77921}
|
| 42 |
+
{"mode": "train", "epoch": 3, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.01025, "loss_rpn_cls": 0.02878, "loss_rpn_bbox": 0.0359, "loss_cls": 0.18969, "acc": 93.35933, "loss_bbox": 0.24929, "loss_mask": 0.23047, "loss": 0.73412, "time": 1.80289}
|
| 43 |
+
{"mode": "train", "epoch": 3, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01066, "loss_rpn_cls": 0.02771, "loss_rpn_bbox": 0.03663, "loss_cls": 0.19327, "acc": 93.22144, "loss_bbox": 0.25321, "loss_mask": 0.23123, "loss": 0.74204, "time": 1.7943}
|
| 44 |
+
{"mode": "train", "epoch": 3, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.01023, "loss_rpn_cls": 0.02782, "loss_rpn_bbox": 0.03613, "loss_cls": 0.19004, "acc": 93.3448, "loss_bbox": 0.24954, "loss_mask": 0.22989, "loss": 0.73343, "time": 1.79026}
|
| 45 |
+
{"mode": "train", "epoch": 3, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.01054, "loss_rpn_cls": 0.02733, "loss_rpn_bbox": 0.03603, "loss_cls": 0.19182, "acc": 93.24014, "loss_bbox": 0.25295, "loss_mask": 0.22778, "loss": 0.7359, "time": 1.78875}
|
| 46 |
+
{"mode": "val", "epoch": 3, "iter": 625, "lr": 0.0, "bbox_mAP": 0.481, "bbox_mAP_50": 0.711, "bbox_mAP_75": 0.528, "bbox_mAP_s": 0.32, "bbox_mAP_m": 0.526, "bbox_mAP_l": 0.638, "bbox_mAP_copypaste": "0.481 0.711 0.528 0.320 0.526 0.638", "segm_mAP": 0.419, "segm_mAP_50": 0.671, "segm_mAP_75": 0.446, "segm_mAP_s": 0.227, "segm_mAP_m": 0.452, "segm_mAP_l": 0.619, "segm_mAP_copypaste": "0.419 0.671 0.446 0.227 0.452 0.619"}
|
| 47 |
+
{"mode": "train", "epoch": 4, "iter": 1000, "lr": 0.0, "memory": 16625, "data_time": 0.01383, "loss_rpn_cls": 0.02653, "loss_rpn_bbox": 0.0363, "loss_cls": 0.1858, "acc": 93.43259, "loss_bbox": 0.24786, "loss_mask": 0.22494, "loss": 0.72143, "time": 1.79755}
|
| 48 |
+
{"mode": "train", "epoch": 4, "iter": 2000, "lr": 0.0, "memory": 16625, "data_time": 0.01016, "loss_rpn_cls": 0.02634, "loss_rpn_bbox": 0.0353, "loss_cls": 0.18367, "acc": 93.44294, "loss_bbox": 0.24697, "loss_mask": 0.22265, "loss": 0.71493, "time": 1.77758}
|
| 49 |
+
{"mode": "train", "epoch": 4, "iter": 3000, "lr": 0.0, "memory": 16625, "data_time": 0.01, "loss_rpn_cls": 0.02719, "loss_rpn_bbox": 0.03579, "loss_cls": 0.18355, "acc": 93.4293, "loss_bbox": 0.24832, "loss_mask": 0.22366, "loss": 0.71851, "time": 1.79598}
|
| 50 |
+
{"mode": "train", "epoch": 4, "iter": 4000, "lr": 0.0, "memory": 16625, "data_time": 0.01042, "loss_rpn_cls": 0.02595, "loss_rpn_bbox": 0.03507, "loss_cls": 0.18251, "acc": 93.47734, "loss_bbox": 0.24548, "loss_mask": 0.22423, "loss": 0.71325, "time": 1.78823}
|
| 51 |
+
{"mode": "train", "epoch": 4, "iter": 5000, "lr": 0.0, "memory": 16625, "data_time": 0.01028, "loss_rpn_cls": 0.02585, "loss_rpn_bbox": 0.03603, "loss_cls": 0.18285, "acc": 93.47993, "loss_bbox": 0.24655, "loss_mask": 0.22428, "loss": 0.71556, "time": 1.80432}
|
| 52 |
+
{"mode": "train", "epoch": 4, "iter": 6000, "lr": 0.0, "memory": 16625, "data_time": 0.01003, "loss_rpn_cls": 0.02681, "loss_rpn_bbox": 0.03476, "loss_cls": 0.18127, "acc": 93.56204, "loss_bbox": 0.24159, "loss_mask": 0.22434, "loss": 0.70878, "time": 1.79859}
|
| 53 |
+
{"mode": "train", "epoch": 4, "iter": 7000, "lr": 0.0, "memory": 16625, "data_time": 0.00983, "loss_rpn_cls": 0.02599, "loss_rpn_bbox": 0.03553, "loss_cls": 0.18516, "acc": 93.38457, "loss_bbox": 0.24872, "loss_mask": 0.22643, "loss": 0.72183, "time": 1.78739}
|
| 54 |
+
{"mode": "train", "epoch": 4, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.01002, "loss_rpn_cls": 0.02661, "loss_rpn_bbox": 0.0356, "loss_cls": 0.18321, "acc": 93.50842, "loss_bbox": 0.24374, "loss_mask": 0.22247, "loss": 0.71163, "time": 1.79854}
|
| 55 |
+
{"mode": "train", "epoch": 4, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.01053, "loss_rpn_cls": 0.02608, "loss_rpn_bbox": 0.03496, "loss_cls": 0.18152, "acc": 93.50024, "loss_bbox": 0.24385, "loss_mask": 0.22411, "loss": 0.71051, "time": 1.8095}
|
| 56 |
+
{"mode": "train", "epoch": 4, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01009, "loss_rpn_cls": 0.02631, "loss_rpn_bbox": 0.03535, "loss_cls": 0.18365, "acc": 93.46301, "loss_bbox": 0.24488, "loss_mask": 0.22221, "loss": 0.7124, "time": 1.79583}
|
| 57 |
+
{"mode": "train", "epoch": 4, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.01027, "loss_rpn_cls": 0.02565, "loss_rpn_bbox": 0.03472, "loss_cls": 0.18165, "acc": 93.51938, "loss_bbox": 0.24187, "loss_mask": 0.22339, "loss": 0.70728, "time": 1.79908}
|
| 58 |
+
{"mode": "train", "epoch": 4, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01039, "loss_rpn_cls": 0.02553, "loss_rpn_bbox": 0.03424, "loss_cls": 0.17972, "acc": 93.59968, "loss_bbox": 0.24018, "loss_mask": 0.22235, "loss": 0.702, "time": 1.8047}
|
| 59 |
+
{"mode": "train", "epoch": 4, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.01027, "loss_rpn_cls": 0.02578, "loss_rpn_bbox": 0.03455, "loss_cls": 0.18109, "acc": 93.56838, "loss_bbox": 0.24227, "loss_mask": 0.22314, "loss": 0.70683, "time": 1.79521}
|
| 60 |
+
{"mode": "train", "epoch": 4, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.01057, "loss_rpn_cls": 0.02602, "loss_rpn_bbox": 0.0341, "loss_cls": 0.18173, "acc": 93.55154, "loss_bbox": 0.24004, "loss_mask": 0.22243, "loss": 0.70432, "time": 1.79353}
|
| 61 |
+
{"mode": "val", "epoch": 4, "iter": 625, "lr": 0.0, "bbox_mAP": 0.494, "bbox_mAP_50": 0.722, "bbox_mAP_75": 0.54, "bbox_mAP_s": 0.331, "bbox_mAP_m": 0.543, "bbox_mAP_l": 0.639, "bbox_mAP_copypaste": "0.494 0.722 0.540 0.331 0.543 0.639", "segm_mAP": 0.429, "segm_mAP_50": 0.685, "segm_mAP_75": 0.457, "segm_mAP_s": 0.235, "segm_mAP_m": 0.465, "segm_mAP_l": 0.622, "segm_mAP_copypaste": "0.429 0.685 0.457 0.235 0.465 0.622"}
|
| 62 |
+
{"mode": "train", "epoch": 5, "iter": 1000, "lr": 0.0, "memory": 16625, "data_time": 0.01379, "loss_rpn_cls": 0.02437, "loss_rpn_bbox": 0.0342, "loss_cls": 0.17518, "acc": 93.69473, "loss_bbox": 0.23804, "loss_mask": 0.21692, "loss": 0.68871, "time": 1.78734}
|
| 63 |
+
{"mode": "train", "epoch": 5, "iter": 2000, "lr": 0.0, "memory": 16625, "data_time": 0.0101, "loss_rpn_cls": 0.02424, "loss_rpn_bbox": 0.03458, "loss_cls": 0.17543, "acc": 93.66409, "loss_bbox": 0.24063, "loss_mask": 0.2182, "loss": 0.69307, "time": 1.775}
|
| 64 |
+
{"mode": "train", "epoch": 5, "iter": 3000, "lr": 0.0, "memory": 16625, "data_time": 0.01036, "loss_rpn_cls": 0.02444, "loss_rpn_bbox": 0.03465, "loss_cls": 0.17407, "acc": 93.72, "loss_bbox": 0.23742, "loss_mask": 0.2178, "loss": 0.68838, "time": 1.78823}
|
| 65 |
+
{"mode": "train", "epoch": 5, "iter": 4000, "lr": 0.0, "memory": 16625, "data_time": 0.01027, "loss_rpn_cls": 0.02457, "loss_rpn_bbox": 0.03448, "loss_cls": 0.17763, "acc": 93.61777, "loss_bbox": 0.24083, "loss_mask": 0.21992, "loss": 0.69744, "time": 1.81257}
|
| 66 |
+
{"mode": "train", "epoch": 5, "iter": 5000, "lr": 0.0, "memory": 16625, "data_time": 0.01081, "loss_rpn_cls": 0.02541, "loss_rpn_bbox": 0.03451, "loss_cls": 0.1755, "acc": 93.67542, "loss_bbox": 0.23797, "loss_mask": 0.21715, "loss": 0.69054, "time": 1.8033}
|
| 67 |
+
{"mode": "train", "epoch": 5, "iter": 6000, "lr": 0.0, "memory": 16625, "data_time": 0.01073, "loss_rpn_cls": 0.02389, "loss_rpn_bbox": 0.03347, "loss_cls": 0.17227, "acc": 93.77517, "loss_bbox": 0.23461, "loss_mask": 0.21611, "loss": 0.68035, "time": 1.77797}
|
| 68 |
+
{"mode": "train", "epoch": 5, "iter": 7000, "lr": 0.0, "memory": 16625, "data_time": 0.01018, "loss_rpn_cls": 0.02502, "loss_rpn_bbox": 0.03411, "loss_cls": 0.176, "acc": 93.66968, "loss_bbox": 0.23736, "loss_mask": 0.21672, "loss": 0.68921, "time": 1.79571}
|
| 69 |
+
{"mode": "train", "epoch": 5, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.01043, "loss_rpn_cls": 0.02463, "loss_rpn_bbox": 0.03436, "loss_cls": 0.17588, "acc": 93.68201, "loss_bbox": 0.23762, "loss_mask": 0.2176, "loss": 0.69009, "time": 1.78971}
|
| 70 |
+
{"mode": "train", "epoch": 5, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.01054, "loss_rpn_cls": 0.02411, "loss_rpn_bbox": 0.03427, "loss_cls": 0.17701, "acc": 93.63325, "loss_bbox": 0.23884, "loss_mask": 0.2154, "loss": 0.68963, "time": 1.79004}
|
| 71 |
+
{"mode": "train", "epoch": 5, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01024, "loss_rpn_cls": 0.02392, "loss_rpn_bbox": 0.03313, "loss_cls": 0.17187, "acc": 93.86565, "loss_bbox": 0.23079, "loss_mask": 0.21261, "loss": 0.67233, "time": 1.78356}
|
| 72 |
+
{"mode": "train", "epoch": 5, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.01031, "loss_rpn_cls": 0.02371, "loss_rpn_bbox": 0.03383, "loss_cls": 0.17392, "acc": 93.75537, "loss_bbox": 0.23325, "loss_mask": 0.21758, "loss": 0.6823, "time": 1.79441}
|
| 73 |
+
{"mode": "train", "epoch": 5, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01051, "loss_rpn_cls": 0.02502, "loss_rpn_bbox": 0.0336, "loss_cls": 0.17433, "acc": 93.74749, "loss_bbox": 0.23415, "loss_mask": 0.2157, "loss": 0.68279, "time": 1.80263}
|
| 74 |
+
{"mode": "train", "epoch": 5, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.01037, "loss_rpn_cls": 0.02496, "loss_rpn_bbox": 0.03352, "loss_cls": 0.175, "acc": 93.69119, "loss_bbox": 0.23493, "loss_mask": 0.21601, "loss": 0.68441, "time": 1.79261}
|
| 75 |
+
{"mode": "train", "epoch": 5, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.01045, "loss_rpn_cls": 0.02379, "loss_rpn_bbox": 0.03398, "loss_cls": 0.17397, "acc": 93.74202, "loss_bbox": 0.23356, "loss_mask": 0.21472, "loss": 0.68002, "time": 1.77998}
|
| 76 |
+
{"mode": "val", "epoch": 5, "iter": 625, "lr": 0.0, "bbox_mAP": 0.508, "bbox_mAP_50": 0.732, "bbox_mAP_75": 0.557, "bbox_mAP_s": 0.348, "bbox_mAP_m": 0.548, "bbox_mAP_l": 0.665, "bbox_mAP_copypaste": "0.508 0.732 0.557 0.348 0.548 0.665", "segm_mAP": 0.435, "segm_mAP_50": 0.692, "segm_mAP_75": 0.466, "segm_mAP_s": 0.243, "segm_mAP_m": 0.468, "segm_mAP_l": 0.634, "segm_mAP_copypaste": "0.435 0.692 0.466 0.243 0.468 0.634"}
|
| 77 |
+
{"mode": "train", "epoch": 6, "iter": 1000, "lr": 0.0, "memory": 16625, "data_time": 0.01375, "loss_rpn_cls": 0.02327, "loss_rpn_bbox": 0.0332, "loss_cls": 0.16745, "acc": 93.93115, "loss_bbox": 0.23072, "loss_mask": 0.21404, "loss": 0.66867, "time": 1.78503}
|
| 78 |
+
{"mode": "train", "epoch": 6, "iter": 2000, "lr": 0.0, "memory": 16625, "data_time": 0.0099, "loss_rpn_cls": 0.02387, "loss_rpn_bbox": 0.03342, "loss_cls": 0.16921, "acc": 93.87334, "loss_bbox": 0.23095, "loss_mask": 0.21243, "loss": 0.66987, "time": 1.78431}
|
| 79 |
+
{"mode": "train", "epoch": 6, "iter": 3000, "lr": 0.0, "memory": 16625, "data_time": 0.01011, "loss_rpn_cls": 0.02293, "loss_rpn_bbox": 0.03308, "loss_cls": 0.1689, "acc": 93.84019, "loss_bbox": 0.23145, "loss_mask": 0.21235, "loss": 0.66871, "time": 1.78154}
|
| 80 |
+
{"mode": "train", "epoch": 6, "iter": 4000, "lr": 0.0, "memory": 16625, "data_time": 0.01062, "loss_rpn_cls": 0.02349, "loss_rpn_bbox": 0.03306, "loss_cls": 0.16983, "acc": 93.83882, "loss_bbox": 0.2318, "loss_mask": 0.21055, "loss": 0.66874, "time": 1.7996}
|
| 81 |
+
{"mode": "train", "epoch": 6, "iter": 5000, "lr": 0.0, "memory": 16625, "data_time": 0.01065, "loss_rpn_cls": 0.0221, "loss_rpn_bbox": 0.0318, "loss_cls": 0.16654, "acc": 93.96313, "loss_bbox": 0.2292, "loss_mask": 0.21042, "loss": 0.66006, "time": 1.79342}
|
| 82 |
+
{"mode": "train", "epoch": 6, "iter": 6000, "lr": 0.0, "memory": 16625, "data_time": 0.01053, "loss_rpn_cls": 0.02331, "loss_rpn_bbox": 0.03312, "loss_cls": 0.16891, "acc": 93.85508, "loss_bbox": 0.23263, "loss_mask": 0.2119, "loss": 0.66986, "time": 1.79372}
|
| 83 |
+
{"mode": "train", "epoch": 6, "iter": 7000, "lr": 0.0, "memory": 16625, "data_time": 0.01006, "loss_rpn_cls": 0.02247, "loss_rpn_bbox": 0.03222, "loss_cls": 0.1677, "acc": 93.92114, "loss_bbox": 0.23004, "loss_mask": 0.20981, "loss": 0.66224, "time": 1.78821}
|
| 84 |
+
{"mode": "train", "epoch": 6, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.01085, "loss_rpn_cls": 0.0236, "loss_rpn_bbox": 0.03391, "loss_cls": 0.17306, "acc": 93.72351, "loss_bbox": 0.23526, "loss_mask": 0.21185, "loss": 0.67769, "time": 1.79113}
|
| 85 |
+
{"mode": "train", "epoch": 6, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.01043, "loss_rpn_cls": 0.02235, "loss_rpn_bbox": 0.03283, "loss_cls": 0.16746, "acc": 93.97861, "loss_bbox": 0.22911, "loss_mask": 0.20938, "loss": 0.66114, "time": 1.80319}
|
| 86 |
+
{"mode": "train", "epoch": 6, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01019, "loss_rpn_cls": 0.02292, "loss_rpn_bbox": 0.03305, "loss_cls": 0.16886, "acc": 93.85811, "loss_bbox": 0.2289, "loss_mask": 0.20987, "loss": 0.66359, "time": 1.79199}
|
| 87 |
+
{"mode": "train", "epoch": 6, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.01017, "loss_rpn_cls": 0.02274, "loss_rpn_bbox": 0.0323, "loss_cls": 0.16754, "acc": 93.9448, "loss_bbox": 0.22866, "loss_mask": 0.21074, "loss": 0.66199, "time": 1.80143}
|
| 88 |
+
{"mode": "train", "epoch": 6, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01042, "loss_rpn_cls": 0.02297, "loss_rpn_bbox": 0.03305, "loss_cls": 0.16809, "acc": 93.89841, "loss_bbox": 0.22785, "loss_mask": 0.21145, "loss": 0.66341, "time": 1.79173}
|
| 89 |
+
{"mode": "train", "epoch": 6, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.01029, "loss_rpn_cls": 0.02316, "loss_rpn_bbox": 0.0324, "loss_cls": 0.17033, "acc": 93.81289, "loss_bbox": 0.23211, "loss_mask": 0.21258, "loss": 0.67059, "time": 1.78311}
|
| 90 |
+
{"mode": "train", "epoch": 6, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.01048, "loss_rpn_cls": 0.02293, "loss_rpn_bbox": 0.03282, "loss_cls": 0.16901, "acc": 93.8593, "loss_bbox": 0.2307, "loss_mask": 0.21286, "loss": 0.66832, "time": 1.78905}
|
| 91 |
+
{"mode": "val", "epoch": 6, "iter": 625, "lr": 0.0, "bbox_mAP": 0.513, "bbox_mAP_50": 0.738, "bbox_mAP_75": 0.56, "bbox_mAP_s": 0.351, "bbox_mAP_m": 0.562, "bbox_mAP_l": 0.665, "bbox_mAP_copypaste": "0.513 0.738 0.560 0.351 0.562 0.665", "segm_mAP": 0.444, "segm_mAP_50": 0.701, "segm_mAP_75": 0.474, "segm_mAP_s": 0.252, "segm_mAP_m": 0.483, "segm_mAP_l": 0.637, "segm_mAP_copypaste": "0.444 0.701 0.474 0.252 0.483 0.637"}
|
| 92 |
+
{"mode": "train", "epoch": 7, "iter": 1000, "lr": 0.0, "memory": 16625, "data_time": 0.01637, "loss_rpn_cls": 0.02252, "loss_rpn_bbox": 0.03268, "loss_cls": 0.16491, "acc": 93.99697, "loss_bbox": 0.2278, "loss_mask": 0.20907, "loss": 0.65698, "time": 1.78726}
|
| 93 |
+
{"mode": "train", "epoch": 7, "iter": 2000, "lr": 0.0, "memory": 16625, "data_time": 0.00996, "loss_rpn_cls": 0.02131, "loss_rpn_bbox": 0.03195, "loss_cls": 0.16249, "acc": 94.03975, "loss_bbox": 0.22579, "loss_mask": 0.2071, "loss": 0.64864, "time": 1.78199}
|
| 94 |
+
{"mode": "train", "epoch": 7, "iter": 3000, "lr": 0.0, "memory": 16625, "data_time": 0.01005, "loss_rpn_cls": 0.02215, "loss_rpn_bbox": 0.03209, "loss_cls": 0.1649, "acc": 93.96445, "loss_bbox": 0.22726, "loss_mask": 0.20814, "loss": 0.65453, "time": 1.7919}
|
| 95 |
+
{"mode": "train", "epoch": 7, "iter": 4000, "lr": 0.0, "memory": 16625, "data_time": 0.01043, "loss_rpn_cls": 0.02212, "loss_rpn_bbox": 0.03256, "loss_cls": 0.16564, "acc": 93.94412, "loss_bbox": 0.22888, "loss_mask": 0.20814, "loss": 0.65733, "time": 1.78335}
|
| 96 |
+
{"mode": "train", "epoch": 7, "iter": 5000, "lr": 0.0, "memory": 16625, "data_time": 0.01019, "loss_rpn_cls": 0.02179, "loss_rpn_bbox": 0.03207, "loss_cls": 0.16295, "acc": 94.06404, "loss_bbox": 0.22438, "loss_mask": 0.20615, "loss": 0.64734, "time": 1.79742}
|
| 97 |
+
{"mode": "train", "epoch": 7, "iter": 6000, "lr": 0.0, "memory": 16625, "data_time": 0.0103, "loss_rpn_cls": 0.02147, "loss_rpn_bbox": 0.03236, "loss_cls": 0.16478, "acc": 93.97544, "loss_bbox": 0.22797, "loss_mask": 0.20795, "loss": 0.65453, "time": 1.79563}
|
| 98 |
+
{"mode": "train", "epoch": 7, "iter": 7000, "lr": 0.0, "memory": 16625, "data_time": 0.01015, "loss_rpn_cls": 0.02237, "loss_rpn_bbox": 0.03178, "loss_cls": 0.16226, "acc": 94.12388, "loss_bbox": 0.22153, "loss_mask": 0.20701, "loss": 0.64494, "time": 1.77475}
|
| 99 |
+
{"mode": "train", "epoch": 7, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.01041, "loss_rpn_cls": 0.02235, "loss_rpn_bbox": 0.03274, "loss_cls": 0.16561, "acc": 93.93604, "loss_bbox": 0.22778, "loss_mask": 0.2089, "loss": 0.65738, "time": 1.79173}
|
| 100 |
+
{"mode": "train", "epoch": 7, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.01081, "loss_rpn_cls": 0.02215, "loss_rpn_bbox": 0.0323, "loss_cls": 0.16337, "acc": 94.02107, "loss_bbox": 0.22551, "loss_mask": 0.20726, "loss": 0.65059, "time": 1.80533}
|
| 101 |
+
{"mode": "train", "epoch": 7, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01026, "loss_rpn_cls": 0.02165, "loss_rpn_bbox": 0.0319, "loss_cls": 0.16277, "acc": 94.07815, "loss_bbox": 0.2249, "loss_mask": 0.20691, "loss": 0.64813, "time": 1.80009}
|
| 102 |
+
{"mode": "train", "epoch": 7, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.01056, "loss_rpn_cls": 0.02157, "loss_rpn_bbox": 0.03209, "loss_cls": 0.16386, "acc": 94.05798, "loss_bbox": 0.22355, "loss_mask": 0.20653, "loss": 0.64759, "time": 1.79561}
|
| 103 |
+
{"mode": "train", "epoch": 7, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01036, "loss_rpn_cls": 0.02186, "loss_rpn_bbox": 0.03168, "loss_cls": 0.16296, "acc": 94.03965, "loss_bbox": 0.22391, "loss_mask": 0.20792, "loss": 0.64833, "time": 1.80554}
|
| 104 |
+
{"mode": "train", "epoch": 7, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.01023, "loss_rpn_cls": 0.02191, "loss_rpn_bbox": 0.03146, "loss_cls": 0.16123, "acc": 94.15237, "loss_bbox": 0.22122, "loss_mask": 0.20597, "loss": 0.64179, "time": 1.78877}
|
| 105 |
+
{"mode": "train", "epoch": 7, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.01023, "loss_rpn_cls": 0.02224, "loss_rpn_bbox": 0.03205, "loss_cls": 0.16429, "acc": 94.01797, "loss_bbox": 0.22426, "loss_mask": 0.21017, "loss": 0.65301, "time": 1.79635}
|
| 106 |
+
{"mode": "val", "epoch": 7, "iter": 625, "lr": 0.0, "bbox_mAP": 0.512, "bbox_mAP_50": 0.738, "bbox_mAP_75": 0.561, "bbox_mAP_s": 0.343, "bbox_mAP_m": 0.56, "bbox_mAP_l": 0.665, "bbox_mAP_copypaste": "0.512 0.738 0.561 0.343 0.560 0.665", "segm_mAP": 0.443, "segm_mAP_50": 0.701, "segm_mAP_75": 0.472, "segm_mAP_s": 0.245, "segm_mAP_m": 0.483, "segm_mAP_l": 0.635, "segm_mAP_copypaste": "0.443 0.701 0.472 0.245 0.483 0.635"}
|
| 107 |
+
{"mode": "train", "epoch": 8, "iter": 1000, "lr": 0.0, "memory": 16625, "data_time": 0.0141, "loss_rpn_cls": 0.02057, "loss_rpn_bbox": 0.03187, "loss_cls": 0.16051, "acc": 94.11541, "loss_bbox": 0.2248, "loss_mask": 0.20421, "loss": 0.64196, "time": 1.81748}
|
| 108 |
+
{"mode": "train", "epoch": 8, "iter": 2000, "lr": 0.0, "memory": 16625, "data_time": 0.00982, "loss_rpn_cls": 0.02156, "loss_rpn_bbox": 0.03089, "loss_cls": 0.15552, "acc": 94.29407, "loss_bbox": 0.21914, "loss_mask": 0.20166, "loss": 0.62878, "time": 1.77445}
|
| 109 |
+
{"mode": "train", "epoch": 8, "iter": 3000, "lr": 0.0, "memory": 16625, "data_time": 0.01025, "loss_rpn_cls": 0.02069, "loss_rpn_bbox": 0.03151, "loss_cls": 0.15919, "acc": 94.12454, "loss_bbox": 0.22214, "loss_mask": 0.20519, "loss": 0.63871, "time": 1.79686}
|
| 110 |
+
{"mode": "train", "epoch": 8, "iter": 4000, "lr": 0.0, "memory": 16625, "data_time": 0.01033, "loss_rpn_cls": 0.02114, "loss_rpn_bbox": 0.03202, "loss_cls": 0.15891, "acc": 94.15481, "loss_bbox": 0.22222, "loss_mask": 0.20517, "loss": 0.63945, "time": 1.80627}
|
| 111 |
+
{"mode": "train", "epoch": 8, "iter": 5000, "lr": 0.0, "memory": 16625, "data_time": 0.01025, "loss_rpn_cls": 0.02152, "loss_rpn_bbox": 0.03085, "loss_cls": 0.15753, "acc": 94.23022, "loss_bbox": 0.21927, "loss_mask": 0.20264, "loss": 0.63181, "time": 1.80011}
|
| 112 |
+
{"mode": "train", "epoch": 8, "iter": 6000, "lr": 0.0, "memory": 16625, "data_time": 0.01088, "loss_rpn_cls": 0.02142, "loss_rpn_bbox": 0.03186, "loss_cls": 0.15982, "acc": 94.13979, "loss_bbox": 0.22151, "loss_mask": 0.20431, "loss": 0.63892, "time": 1.80079}
|
| 113 |
+
{"mode": "train", "epoch": 8, "iter": 7000, "lr": 0.0, "memory": 16625, "data_time": 0.01023, "loss_rpn_cls": 0.02095, "loss_rpn_bbox": 0.03069, "loss_cls": 0.15734, "acc": 94.22249, "loss_bbox": 0.218, "loss_mask": 0.20285, "loss": 0.62983, "time": 1.78435}
|
| 114 |
+
{"mode": "train", "epoch": 8, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.01066, "loss_rpn_cls": 0.02123, "loss_rpn_bbox": 0.0313, "loss_cls": 0.16168, "acc": 94.09275, "loss_bbox": 0.22247, "loss_mask": 0.20473, "loss": 0.64141, "time": 1.794}
|
| 115 |
+
{"mode": "train", "epoch": 8, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.01086, "loss_rpn_cls": 0.02161, "loss_rpn_bbox": 0.03154, "loss_cls": 0.16093, "acc": 94.10854, "loss_bbox": 0.22382, "loss_mask": 0.20658, "loss": 0.64448, "time": 1.79458}
|
| 116 |
+
{"mode": "train", "epoch": 8, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01095, "loss_rpn_cls": 0.02085, "loss_rpn_bbox": 0.03079, "loss_cls": 0.15712, "acc": 94.23291, "loss_bbox": 0.21903, "loss_mask": 0.20233, "loss": 0.63012, "time": 1.78552}
|
| 117 |
+
{"mode": "train", "epoch": 8, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.01029, "loss_rpn_cls": 0.01996, "loss_rpn_bbox": 0.03054, "loss_cls": 0.1585, "acc": 94.19905, "loss_bbox": 0.21925, "loss_mask": 0.20235, "loss": 0.6306, "time": 1.80619}
|
| 118 |
+
{"mode": "train", "epoch": 8, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01041, "loss_rpn_cls": 0.02074, "loss_rpn_bbox": 0.03081, "loss_cls": 0.158, "acc": 94.21079, "loss_bbox": 0.21976, "loss_mask": 0.20257, "loss": 0.63188, "time": 1.78389}
|
| 119 |
+
{"mode": "train", "epoch": 8, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.01041, "loss_rpn_cls": 0.02189, "loss_rpn_bbox": 0.03167, "loss_cls": 0.16143, "acc": 94.12021, "loss_bbox": 0.22402, "loss_mask": 0.20486, "loss": 0.64387, "time": 1.80275}
|
| 120 |
+
{"mode": "train", "epoch": 8, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.01062, "loss_rpn_cls": 0.02167, "loss_rpn_bbox": 0.0318, "loss_cls": 0.1617, "acc": 94.09453, "loss_bbox": 0.22304, "loss_mask": 0.20407, "loss": 0.64227, "time": 1.7791}
|
| 121 |
+
{"mode": "val", "epoch": 8, "iter": 625, "lr": 0.0, "bbox_mAP": 0.517, "bbox_mAP_50": 0.739, "bbox_mAP_75": 0.569, "bbox_mAP_s": 0.345, "bbox_mAP_m": 0.565, "bbox_mAP_l": 0.67, "bbox_mAP_copypaste": "0.517 0.739 0.569 0.345 0.565 0.670", "segm_mAP": 0.452, "segm_mAP_50": 0.705, "segm_mAP_75": 0.485, "segm_mAP_s": 0.249, "segm_mAP_m": 0.492, "segm_mAP_l": 0.644, "segm_mAP_copypaste": "0.452 0.705 0.485 0.249 0.492 0.644"}
|
| 122 |
+
{"mode": "train", "epoch": 9, "iter": 1000, "lr": 0.0, "memory": 16625, "data_time": 0.01369, "loss_rpn_cls": 0.0183, "loss_rpn_bbox": 0.02911, "loss_cls": 0.14986, "acc": 94.45352, "loss_bbox": 0.21084, "loss_mask": 0.19715, "loss": 0.60526, "time": 1.78738}
|
| 123 |
+
{"mode": "train", "epoch": 9, "iter": 2000, "lr": 0.0, "memory": 16625, "data_time": 0.01013, "loss_rpn_cls": 0.01882, "loss_rpn_bbox": 0.02948, "loss_cls": 0.14949, "acc": 94.4626, "loss_bbox": 0.2128, "loss_mask": 0.19706, "loss": 0.60765, "time": 1.76864}
|
| 124 |
+
{"mode": "train", "epoch": 9, "iter": 3000, "lr": 0.0, "memory": 16625, "data_time": 0.0105, "loss_rpn_cls": 0.01928, "loss_rpn_bbox": 0.02971, "loss_cls": 0.14947, "acc": 94.45149, "loss_bbox": 0.21319, "loss_mask": 0.19671, "loss": 0.60835, "time": 1.7892}
|
| 125 |
+
{"mode": "train", "epoch": 9, "iter": 4000, "lr": 0.0, "memory": 16625, "data_time": 0.01062, "loss_rpn_cls": 0.01857, "loss_rpn_bbox": 0.02929, "loss_cls": 0.14728, "acc": 94.51797, "loss_bbox": 0.21074, "loss_mask": 0.19438, "loss": 0.60027, "time": 1.78229}
|
| 126 |
+
{"mode": "train", "epoch": 9, "iter": 5000, "lr": 0.0, "memory": 16625, "data_time": 0.01024, "loss_rpn_cls": 0.01889, "loss_rpn_bbox": 0.02914, "loss_cls": 0.14441, "acc": 94.57129, "loss_bbox": 0.2083, "loss_mask": 0.19587, "loss": 0.59661, "time": 1.79687}
|
| 127 |
+
{"mode": "train", "epoch": 9, "iter": 6000, "lr": 0.0, "memory": 16625, "data_time": 0.0105, "loss_rpn_cls": 0.01893, "loss_rpn_bbox": 0.02925, "loss_cls": 0.14585, "acc": 94.55425, "loss_bbox": 0.20967, "loss_mask": 0.19471, "loss": 0.59841, "time": 1.78331}
|
| 128 |
+
{"mode": "train", "epoch": 9, "iter": 7000, "lr": 0.0, "memory": 16625, "data_time": 0.0105, "loss_rpn_cls": 0.01831, "loss_rpn_bbox": 0.0298, "loss_cls": 0.14868, "acc": 94.45798, "loss_bbox": 0.21093, "loss_mask": 0.19438, "loss": 0.6021, "time": 1.77861}
|
| 129 |
+
{"mode": "train", "epoch": 9, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.0103, "loss_rpn_cls": 0.01829, "loss_rpn_bbox": 0.02821, "loss_cls": 0.14334, "acc": 94.65972, "loss_bbox": 0.20776, "loss_mask": 0.19321, "loss": 0.5908, "time": 1.80025}
|
| 130 |
+
{"mode": "train", "epoch": 9, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.01013, "loss_rpn_cls": 0.01807, "loss_rpn_bbox": 0.02909, "loss_cls": 0.14511, "acc": 94.59128, "loss_bbox": 0.20788, "loss_mask": 0.194, "loss": 0.59416, "time": 1.81147}
|
| 131 |
+
{"mode": "train", "epoch": 9, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01044, "loss_rpn_cls": 0.01827, "loss_rpn_bbox": 0.0291, "loss_cls": 0.14695, "acc": 94.49492, "loss_bbox": 0.21143, "loss_mask": 0.19442, "loss": 0.60017, "time": 1.77646}
|
| 132 |
+
{"mode": "train", "epoch": 9, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.01008, "loss_rpn_cls": 0.01813, "loss_rpn_bbox": 0.02909, "loss_cls": 0.14409, "acc": 94.61138, "loss_bbox": 0.20824, "loss_mask": 0.19336, "loss": 0.5929, "time": 1.7912}
|
| 133 |
+
{"mode": "train", "epoch": 9, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01028, "loss_rpn_cls": 0.01758, "loss_rpn_bbox": 0.02853, "loss_cls": 0.14315, "acc": 94.64031, "loss_bbox": 0.20632, "loss_mask": 0.19217, "loss": 0.58775, "time": 1.7966}
|
| 134 |
+
{"mode": "train", "epoch": 9, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.00989, "loss_rpn_cls": 0.01798, "loss_rpn_bbox": 0.02885, "loss_cls": 0.14481, "acc": 94.57678, "loss_bbox": 0.20983, "loss_mask": 0.19498, "loss": 0.59645, "time": 1.79529}
|
| 135 |
+
{"mode": "train", "epoch": 9, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.01026, "loss_rpn_cls": 0.01794, "loss_rpn_bbox": 0.0284, "loss_cls": 0.14411, "acc": 94.6145, "loss_bbox": 0.20654, "loss_mask": 0.19209, "loss": 0.58907, "time": 1.78484}
|
| 136 |
+
{"mode": "val", "epoch": 9, "iter": 625, "lr": 0.0, "bbox_mAP": 0.535, "bbox_mAP_50": 0.754, "bbox_mAP_75": 0.585, "bbox_mAP_s": 0.363, "bbox_mAP_m": 0.581, "bbox_mAP_l": 0.693, "bbox_mAP_copypaste": "0.535 0.754 0.585 0.363 0.581 0.693", "segm_mAP": 0.465, "segm_mAP_50": 0.721, "segm_mAP_75": 0.499, "segm_mAP_s": 0.268, "segm_mAP_m": 0.502, "segm_mAP_l": 0.656, "segm_mAP_copypaste": "0.465 0.721 0.499 0.268 0.502 0.656"}
|
| 137 |
+
{"mode": "train", "epoch": 10, "iter": 1000, "lr": 0.0, "memory": 16625, "data_time": 0.01398, "loss_rpn_cls": 0.01757, "loss_rpn_bbox": 0.02876, "loss_cls": 0.14344, "acc": 94.6053, "loss_bbox": 0.20866, "loss_mask": 0.19259, "loss": 0.59102, "time": 1.80709}
|
| 138 |
+
{"mode": "train", "epoch": 10, "iter": 2000, "lr": 0.0, "memory": 16625, "data_time": 0.01086, "loss_rpn_cls": 0.01763, "loss_rpn_bbox": 0.02876, "loss_cls": 0.14199, "acc": 94.66113, "loss_bbox": 0.20769, "loss_mask": 0.19228, "loss": 0.58836, "time": 1.79444}
|
| 139 |
+
{"mode": "train", "epoch": 10, "iter": 3000, "lr": 0.0, "memory": 16625, "data_time": 0.00993, "loss_rpn_cls": 0.01762, "loss_rpn_bbox": 0.02847, "loss_cls": 0.14279, "acc": 94.66509, "loss_bbox": 0.20729, "loss_mask": 0.19325, "loss": 0.58942, "time": 1.81191}
|
| 140 |
+
{"mode": "train", "epoch": 10, "iter": 4000, "lr": 0.0, "memory": 16625, "data_time": 0.01006, "loss_rpn_cls": 0.01747, "loss_rpn_bbox": 0.02802, "loss_cls": 0.14055, "acc": 94.73945, "loss_bbox": 0.20357, "loss_mask": 0.19063, "loss": 0.58024, "time": 1.79599}
|
| 141 |
+
{"mode": "train", "epoch": 10, "iter": 5000, "lr": 0.0, "memory": 16625, "data_time": 0.00996, "loss_rpn_cls": 0.01751, "loss_rpn_bbox": 0.02894, "loss_cls": 0.14124, "acc": 94.67063, "loss_bbox": 0.20657, "loss_mask": 0.19185, "loss": 0.58612, "time": 1.78874}
|
| 142 |
+
{"mode": "train", "epoch": 10, "iter": 6000, "lr": 0.0, "memory": 16625, "data_time": 0.01034, "loss_rpn_cls": 0.01848, "loss_rpn_bbox": 0.02884, "loss_cls": 0.14494, "acc": 94.55244, "loss_bbox": 0.2101, "loss_mask": 0.19368, "loss": 0.59604, "time": 1.78804}
|
| 143 |
+
{"mode": "train", "epoch": 10, "iter": 7000, "lr": 0.0, "memory": 16625, "data_time": 0.01075, "loss_rpn_cls": 0.01767, "loss_rpn_bbox": 0.02878, "loss_cls": 0.14206, "acc": 94.67976, "loss_bbox": 0.20724, "loss_mask": 0.19216, "loss": 0.5879, "time": 1.78328}
|
| 144 |
+
{"mode": "train", "epoch": 10, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.01019, "loss_rpn_cls": 0.01715, "loss_rpn_bbox": 0.02835, "loss_cls": 0.14249, "acc": 94.67122, "loss_bbox": 0.20721, "loss_mask": 0.19282, "loss": 0.58802, "time": 1.77423}
|
| 145 |
+
{"mode": "train", "epoch": 10, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.0102, "loss_rpn_cls": 0.01731, "loss_rpn_bbox": 0.02799, "loss_cls": 0.14148, "acc": 94.6948, "loss_bbox": 0.20568, "loss_mask": 0.19127, "loss": 0.58373, "time": 1.79035}
|
| 146 |
+
{"mode": "train", "epoch": 10, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01032, "loss_rpn_cls": 0.01778, "loss_rpn_bbox": 0.02837, "loss_cls": 0.14476, "acc": 94.56951, "loss_bbox": 0.20782, "loss_mask": 0.1913, "loss": 0.59003, "time": 1.78719}
|
| 147 |
+
{"mode": "train", "epoch": 10, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.01027, "loss_rpn_cls": 0.01844, "loss_rpn_bbox": 0.02949, "loss_cls": 0.14409, "acc": 94.57976, "loss_bbox": 0.20878, "loss_mask": 0.19478, "loss": 0.59559, "time": 1.79686}
|
| 148 |
+
{"mode": "train", "epoch": 10, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01043, "loss_rpn_cls": 0.01697, "loss_rpn_bbox": 0.02801, "loss_cls": 0.14168, "acc": 94.68569, "loss_bbox": 0.20581, "loss_mask": 0.19136, "loss": 0.58384, "time": 1.7886}
|
| 149 |
+
{"mode": "train", "epoch": 10, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.01002, "loss_rpn_cls": 0.0174, "loss_rpn_bbox": 0.02846, "loss_cls": 0.1421, "acc": 94.67915, "loss_bbox": 0.20661, "loss_mask": 0.19114, "loss": 0.58571, "time": 1.77637}
|
| 150 |
+
{"mode": "train", "epoch": 10, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.01053, "loss_rpn_cls": 0.01799, "loss_rpn_bbox": 0.0284, "loss_cls": 0.14166, "acc": 94.71641, "loss_bbox": 0.20504, "loss_mask": 0.18925, "loss": 0.58233, "time": 1.79614}
|
| 151 |
+
{"mode": "val", "epoch": 10, "iter": 625, "lr": 0.0, "bbox_mAP": 0.537, "bbox_mAP_50": 0.756, "bbox_mAP_75": 0.589, "bbox_mAP_s": 0.366, "bbox_mAP_m": 0.583, "bbox_mAP_l": 0.695, "bbox_mAP_copypaste": "0.537 0.756 0.589 0.366 0.583 0.695", "segm_mAP": 0.465, "segm_mAP_50": 0.721, "segm_mAP_75": 0.499, "segm_mAP_s": 0.269, "segm_mAP_m": 0.501, "segm_mAP_l": 0.656, "segm_mAP_copypaste": "0.465 0.721 0.499 0.269 0.501 0.656"}
|
| 152 |
+
{"mode": "train", "epoch": 11, "iter": 1000, "lr": 0.0, "memory": 16625, "data_time": 0.01424, "loss_rpn_cls": 0.01791, "loss_rpn_bbox": 0.02884, "loss_cls": 0.14244, "acc": 94.64194, "loss_bbox": 0.20978, "loss_mask": 0.19378, "loss": 0.59274, "time": 1.79104}
|
| 153 |
+
{"mode": "train", "epoch": 11, "iter": 2000, "lr": 0.0, "memory": 16625, "data_time": 0.01038, "loss_rpn_cls": 0.01747, "loss_rpn_bbox": 0.02795, "loss_cls": 0.14152, "acc": 94.67236, "loss_bbox": 0.20657, "loss_mask": 0.19076, "loss": 0.58427, "time": 1.77822}
|
| 154 |
+
{"mode": "train", "epoch": 11, "iter": 3000, "lr": 0.0, "memory": 16625, "data_time": 0.01024, "loss_rpn_cls": 0.01688, "loss_rpn_bbox": 0.0279, "loss_cls": 0.14052, "acc": 94.69629, "loss_bbox": 0.20498, "loss_mask": 0.19118, "loss": 0.58147, "time": 1.80233}
|
| 155 |
+
{"mode": "train", "epoch": 11, "iter": 4000, "lr": 0.0, "memory": 16625, "data_time": 0.01041, "loss_rpn_cls": 0.0173, "loss_rpn_bbox": 0.02829, "loss_cls": 0.14133, "acc": 94.70066, "loss_bbox": 0.20485, "loss_mask": 0.19069, "loss": 0.58247, "time": 1.8036}
|
| 156 |
+
{"mode": "train", "epoch": 11, "iter": 5000, "lr": 0.0, "memory": 16625, "data_time": 0.01039, "loss_rpn_cls": 0.01778, "loss_rpn_bbox": 0.02902, "loss_cls": 0.14242, "acc": 94.62737, "loss_bbox": 0.20841, "loss_mask": 0.192, "loss": 0.58963, "time": 1.78434}
|
| 157 |
+
{"mode": "train", "epoch": 11, "iter": 6000, "lr": 0.0, "memory": 16625, "data_time": 0.01036, "loss_rpn_cls": 0.01734, "loss_rpn_bbox": 0.02829, "loss_cls": 0.14159, "acc": 94.66814, "loss_bbox": 0.20656, "loss_mask": 0.19068, "loss": 0.58446, "time": 1.78839}
|
| 158 |
+
{"mode": "train", "epoch": 11, "iter": 7000, "lr": 0.0, "memory": 16625, "data_time": 0.01069, "loss_rpn_cls": 0.01749, "loss_rpn_bbox": 0.02833, "loss_cls": 0.14147, "acc": 94.6804, "loss_bbox": 0.20589, "loss_mask": 0.1915, "loss": 0.58468, "time": 1.79673}
|
| 159 |
+
{"mode": "train", "epoch": 11, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.01049, "loss_rpn_cls": 0.01732, "loss_rpn_bbox": 0.02826, "loss_cls": 0.14068, "acc": 94.72148, "loss_bbox": 0.20756, "loss_mask": 0.192, "loss": 0.58581, "time": 1.81192}
|
| 160 |
+
{"mode": "train", "epoch": 11, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.01056, "loss_rpn_cls": 0.0173, "loss_rpn_bbox": 0.0281, "loss_cls": 0.14219, "acc": 94.64839, "loss_bbox": 0.20634, "loss_mask": 0.19087, "loss": 0.5848, "time": 1.79641}
|
| 161 |
+
{"mode": "train", "epoch": 11, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01011, "loss_rpn_cls": 0.01698, "loss_rpn_bbox": 0.02776, "loss_cls": 0.13849, "acc": 94.80034, "loss_bbox": 0.20236, "loss_mask": 0.18946, "loss": 0.57505, "time": 1.79276}
|
| 162 |
+
{"mode": "train", "epoch": 11, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.01041, "loss_rpn_cls": 0.01724, "loss_rpn_bbox": 0.02784, "loss_cls": 0.13983, "acc": 94.7575, "loss_bbox": 0.20524, "loss_mask": 0.19099, "loss": 0.58115, "time": 1.80258}
|
| 163 |
+
{"mode": "train", "epoch": 11, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01052, "loss_rpn_cls": 0.01787, "loss_rpn_bbox": 0.0287, "loss_cls": 0.14023, "acc": 94.71614, "loss_bbox": 0.20637, "loss_mask": 0.19275, "loss": 0.58592, "time": 1.80272}
|
| 164 |
+
{"mode": "train", "epoch": 11, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.01032, "loss_rpn_cls": 0.01787, "loss_rpn_bbox": 0.02873, "loss_cls": 0.14107, "acc": 94.69509, "loss_bbox": 0.20613, "loss_mask": 0.1894, "loss": 0.58321, "time": 1.78746}
|
| 165 |
+
{"mode": "train", "epoch": 11, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.01025, "loss_rpn_cls": 0.01716, "loss_rpn_bbox": 0.02818, "loss_cls": 0.14123, "acc": 94.71084, "loss_bbox": 0.20644, "loss_mask": 0.19062, "loss": 0.58362, "time": 1.78706}
|
| 166 |
+
{"mode": "val", "epoch": 11, "iter": 625, "lr": 0.0, "bbox_mAP": 0.54, "bbox_mAP_50": 0.756, "bbox_mAP_75": 0.589, "bbox_mAP_s": 0.366, "bbox_mAP_m": 0.586, "bbox_mAP_l": 0.697, "bbox_mAP_copypaste": "0.540 0.756 0.589 0.366 0.586 0.697", "segm_mAP": 0.467, "segm_mAP_50": 0.722, "segm_mAP_75": 0.503, "segm_mAP_s": 0.268, "segm_mAP_m": 0.502, "segm_mAP_l": 0.66, "segm_mAP_copypaste": "0.467 0.722 0.503 0.268 0.502 0.660"}
|
| 167 |
+
{"mode": "train", "epoch": 12, "iter": 1000, "lr": 0.0, "memory": 16625, "data_time": 0.01369, "loss_rpn_cls": 0.01732, "loss_rpn_bbox": 0.02813, "loss_cls": 0.13904, "acc": 94.75728, "loss_bbox": 0.20493, "loss_mask": 0.18991, "loss": 0.57932, "time": 1.79737}
|
| 168 |
+
{"mode": "train", "epoch": 12, "iter": 2000, "lr": 0.0, "memory": 16625, "data_time": 0.01014, "loss_rpn_cls": 0.01721, "loss_rpn_bbox": 0.02756, "loss_cls": 0.13947, "acc": 94.75864, "loss_bbox": 0.20409, "loss_mask": 0.18844, "loss": 0.57677, "time": 1.78906}
|
| 169 |
+
{"mode": "train", "epoch": 12, "iter": 3000, "lr": 0.0, "memory": 16625, "data_time": 0.01027, "loss_rpn_cls": 0.01673, "loss_rpn_bbox": 0.02772, "loss_cls": 0.13891, "acc": 94.77673, "loss_bbox": 0.20352, "loss_mask": 0.18933, "loss": 0.57621, "time": 1.80055}
|
| 170 |
+
{"mode": "train", "epoch": 12, "iter": 4000, "lr": 0.0, "memory": 16625, "data_time": 0.01045, "loss_rpn_cls": 0.01705, "loss_rpn_bbox": 0.02791, "loss_cls": 0.13923, "acc": 94.76326, "loss_bbox": 0.20453, "loss_mask": 0.18952, "loss": 0.57825, "time": 1.79384}
|
| 171 |
+
{"mode": "train", "epoch": 12, "iter": 5000, "lr": 0.0, "memory": 16625, "data_time": 0.01025, "loss_rpn_cls": 0.01716, "loss_rpn_bbox": 0.02778, "loss_cls": 0.13973, "acc": 94.71843, "loss_bbox": 0.20478, "loss_mask": 0.18963, "loss": 0.57908, "time": 1.79713}
|
| 172 |
+
{"mode": "train", "epoch": 12, "iter": 6000, "lr": 0.0, "memory": 16625, "data_time": 0.01048, "loss_rpn_cls": 0.017, "loss_rpn_bbox": 0.02771, "loss_cls": 0.13746, "acc": 94.84778, "loss_bbox": 0.20193, "loss_mask": 0.18915, "loss": 0.57325, "time": 1.79234}
|
| 173 |
+
{"mode": "train", "epoch": 12, "iter": 7000, "lr": 0.0, "memory": 16625, "data_time": 0.01034, "loss_rpn_cls": 0.01724, "loss_rpn_bbox": 0.02844, "loss_cls": 0.14264, "acc": 94.64568, "loss_bbox": 0.20816, "loss_mask": 0.19147, "loss": 0.58795, "time": 1.78898}
|
| 174 |
+
{"mode": "train", "epoch": 12, "iter": 8000, "lr": 0.0, "memory": 16625, "data_time": 0.01049, "loss_rpn_cls": 0.01736, "loss_rpn_bbox": 0.02824, "loss_cls": 0.13858, "acc": 94.73723, "loss_bbox": 0.20594, "loss_mask": 0.18943, "loss": 0.57955, "time": 1.79788}
|
| 175 |
+
{"mode": "train", "epoch": 12, "iter": 9000, "lr": 0.0, "memory": 16625, "data_time": 0.01006, "loss_rpn_cls": 0.01765, "loss_rpn_bbox": 0.02855, "loss_cls": 0.13988, "acc": 94.73291, "loss_bbox": 0.20561, "loss_mask": 0.19054, "loss": 0.58222, "time": 1.80005}
|
| 176 |
+
{"mode": "train", "epoch": 12, "iter": 10000, "lr": 0.0, "memory": 16625, "data_time": 0.01012, "loss_rpn_cls": 0.01705, "loss_rpn_bbox": 0.02805, "loss_cls": 0.13892, "acc": 94.78455, "loss_bbox": 0.20484, "loss_mask": 0.19147, "loss": 0.58032, "time": 1.77916}
|
| 177 |
+
{"mode": "train", "epoch": 12, "iter": 11000, "lr": 0.0, "memory": 16625, "data_time": 0.01045, "loss_rpn_cls": 0.01714, "loss_rpn_bbox": 0.02845, "loss_cls": 0.13857, "acc": 94.78147, "loss_bbox": 0.20448, "loss_mask": 0.19007, "loss": 0.57872, "time": 1.79599}
|
| 178 |
+
{"mode": "train", "epoch": 12, "iter": 12000, "lr": 0.0, "memory": 16625, "data_time": 0.01007, "loss_rpn_cls": 0.01661, "loss_rpn_bbox": 0.02752, "loss_cls": 0.13574, "acc": 94.86975, "loss_bbox": 0.20109, "loss_mask": 0.18848, "loss": 0.56944, "time": 1.78675}
|
| 179 |
+
{"mode": "train", "epoch": 12, "iter": 13000, "lr": 0.0, "memory": 16625, "data_time": 0.01014, "loss_rpn_cls": 0.01736, "loss_rpn_bbox": 0.02831, "loss_cls": 0.13944, "acc": 94.74512, "loss_bbox": 0.20594, "loss_mask": 0.1902, "loss": 0.58125, "time": 1.79183}
|
| 180 |
+
{"mode": "train", "epoch": 12, "iter": 14000, "lr": 0.0, "memory": 16625, "data_time": 0.01007, "loss_rpn_cls": 0.01716, "loss_rpn_bbox": 0.0277, "loss_cls": 0.13736, "acc": 94.84421, "loss_bbox": 0.20193, "loss_mask": 0.18799, "loss": 0.57214, "time": 1.7962}
|
| 181 |
+
{"mode": "val", "epoch": 12, "iter": 625, "lr": 0.0, "bbox_mAP": 0.539, "bbox_mAP_50": 0.756, "bbox_mAP_75": 0.59, "bbox_mAP_s": 0.367, "bbox_mAP_m": 0.585, "bbox_mAP_l": 0.696, "bbox_mAP_copypaste": "0.539 0.756 0.590 0.367 0.585 0.696", "segm_mAP": 0.467, "segm_mAP_50": 0.722, "segm_mAP_75": 0.503, "segm_mAP_s": 0.27, "segm_mAP_m": 0.502, "segm_mAP_l": 0.66, "segm_mAP_copypaste": "0.467 0.722 0.503 0.270 0.502 0.660"}
|
ckpts/vitp_coco_maskrcnn_539/epoch_12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bef9ed0f74b1774a13f8c35fa76cf121563c6422f819c40b07675ca4cdc12acc
|
| 3 |
+
size 4320110425
|
ckpts/vitp_coco_maskrcnn_539/vitp_coco_maskrcnn_bs8_lr3e-5_dpr03_ld_60.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type = 'CocoDataset'
|
| 2 |
+
data_root = '/home/share/coco/'
|
| 3 |
+
img_norm_cfg = dict(
|
| 4 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 5 |
+
train_pipeline = [
|
| 6 |
+
dict(type='LoadImageFromFile'),
|
| 7 |
+
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
| 8 |
+
dict(type='RandomFlip', flip_ratio=0.5),
|
| 9 |
+
dict(
|
| 10 |
+
type='AutoAugment',
|
| 11 |
+
policies=[[{
|
| 12 |
+
'type':
|
| 13 |
+
'Resize',
|
| 14 |
+
'img_scale': [(480, 1333), (512, 1333), (544, 1333), (576, 1333),
|
| 15 |
+
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
|
| 16 |
+
(736, 1333), (768, 1333), (800, 1333)],
|
| 17 |
+
'multiscale_mode':
|
| 18 |
+
'value',
|
| 19 |
+
'keep_ratio':
|
| 20 |
+
True
|
| 21 |
+
}],
|
| 22 |
+
[{
|
| 23 |
+
'type': 'Resize',
|
| 24 |
+
'img_scale': [(400, 1333), (500, 1333), (600, 1333)],
|
| 25 |
+
'multiscale_mode': 'value',
|
| 26 |
+
'keep_ratio': True
|
| 27 |
+
}, {
|
| 28 |
+
'type': 'RandomCrop',
|
| 29 |
+
'crop_type': 'absolute_range',
|
| 30 |
+
'crop_size': (384, 600),
|
| 31 |
+
'allow_negative_crop': True
|
| 32 |
+
}, {
|
| 33 |
+
'type':
|
| 34 |
+
'Resize',
|
| 35 |
+
'img_scale': [(480, 1333), (512, 1333), (544, 1333),
|
| 36 |
+
(576, 1333), (608, 1333), (640, 1333),
|
| 37 |
+
(672, 1333), (704, 1333), (736, 1333),
|
| 38 |
+
(768, 1333), (800, 1333)],
|
| 39 |
+
'multiscale_mode':
|
| 40 |
+
'value',
|
| 41 |
+
'override':
|
| 42 |
+
True,
|
| 43 |
+
'keep_ratio':
|
| 44 |
+
True
|
| 45 |
+
}]]),
|
| 46 |
+
dict(
|
| 47 |
+
type='Normalize',
|
| 48 |
+
mean=[123.675, 116.28, 103.53],
|
| 49 |
+
std=[58.395, 57.12, 57.375],
|
| 50 |
+
to_rgb=True),
|
| 51 |
+
dict(type='Pad', size_divisor=32),
|
| 52 |
+
dict(type='DefaultFormatBundle'),
|
| 53 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
|
| 54 |
+
]
|
| 55 |
+
test_pipeline = [
|
| 56 |
+
dict(type='LoadImageFromFile'),
|
| 57 |
+
dict(
|
| 58 |
+
type='MultiScaleFlipAug',
|
| 59 |
+
img_scale=(1333, 800),
|
| 60 |
+
flip=False,
|
| 61 |
+
transforms=[
|
| 62 |
+
dict(type='Resize', keep_ratio=True),
|
| 63 |
+
dict(type='RandomFlip'),
|
| 64 |
+
dict(
|
| 65 |
+
type='Normalize',
|
| 66 |
+
mean=[123.675, 116.28, 103.53],
|
| 67 |
+
std=[58.395, 57.12, 57.375],
|
| 68 |
+
to_rgb=True),
|
| 69 |
+
dict(type='Pad', size_divisor=32),
|
| 70 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 71 |
+
dict(type='Collect', keys=['img'])
|
| 72 |
+
])
|
| 73 |
+
]
|
| 74 |
+
data = dict(
|
| 75 |
+
samples_per_gpu=1,
|
| 76 |
+
workers_per_gpu=1,
|
| 77 |
+
train=dict(
|
| 78 |
+
type='CocoDataset',
|
| 79 |
+
ann_file='/home/share/coco/annotations/instances_train2017.json',
|
| 80 |
+
img_prefix='/home/share/coco/train2017/',
|
| 81 |
+
pipeline=[
|
| 82 |
+
dict(type='LoadImageFromFile'),
|
| 83 |
+
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
| 84 |
+
dict(type='RandomFlip', flip_ratio=0.5),
|
| 85 |
+
dict(
|
| 86 |
+
type='AutoAugment',
|
| 87 |
+
policies=[[{
|
| 88 |
+
'type':
|
| 89 |
+
'Resize',
|
| 90 |
+
'img_scale': [(480, 1333), (512, 1333), (544, 1333),
|
| 91 |
+
(576, 1333), (608, 1333), (640, 1333),
|
| 92 |
+
(672, 1333), (704, 1333), (736, 1333),
|
| 93 |
+
(768, 1333), (800, 1333)],
|
| 94 |
+
'multiscale_mode':
|
| 95 |
+
'value',
|
| 96 |
+
'keep_ratio':
|
| 97 |
+
True
|
| 98 |
+
}],
|
| 99 |
+
[{
|
| 100 |
+
'type': 'Resize',
|
| 101 |
+
'img_scale': [(400, 1333), (500, 1333),
|
| 102 |
+
(600, 1333)],
|
| 103 |
+
'multiscale_mode': 'value',
|
| 104 |
+
'keep_ratio': True
|
| 105 |
+
}, {
|
| 106 |
+
'type': 'RandomCrop',
|
| 107 |
+
'crop_type': 'absolute_range',
|
| 108 |
+
'crop_size': (384, 600),
|
| 109 |
+
'allow_negative_crop': True
|
| 110 |
+
}, {
|
| 111 |
+
'type':
|
| 112 |
+
'Resize',
|
| 113 |
+
'img_scale': [(480, 1333), (512, 1333),
|
| 114 |
+
(544, 1333), (576, 1333),
|
| 115 |
+
(608, 1333), (640, 1333),
|
| 116 |
+
(672, 1333), (704, 1333),
|
| 117 |
+
(736, 1333), (768, 1333),
|
| 118 |
+
(800, 1333)],
|
| 119 |
+
'multiscale_mode':
|
| 120 |
+
'value',
|
| 121 |
+
'override':
|
| 122 |
+
True,
|
| 123 |
+
'keep_ratio':
|
| 124 |
+
True
|
| 125 |
+
}]]),
|
| 126 |
+
dict(
|
| 127 |
+
type='Normalize',
|
| 128 |
+
mean=[123.675, 116.28, 103.53],
|
| 129 |
+
std=[58.395, 57.12, 57.375],
|
| 130 |
+
to_rgb=True),
|
| 131 |
+
dict(type='Pad', size_divisor=32),
|
| 132 |
+
dict(type='DefaultFormatBundle'),
|
| 133 |
+
dict(
|
| 134 |
+
type='Collect',
|
| 135 |
+
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
|
| 136 |
+
]),
|
| 137 |
+
val=dict(
|
| 138 |
+
type='CocoDataset',
|
| 139 |
+
ann_file='/home/share/coco/annotations/instances_val2017.json',
|
| 140 |
+
img_prefix='/home/share/coco/val2017/',
|
| 141 |
+
pipeline=[
|
| 142 |
+
dict(type='LoadImageFromFile'),
|
| 143 |
+
dict(
|
| 144 |
+
type='MultiScaleFlipAug',
|
| 145 |
+
img_scale=(1333, 800),
|
| 146 |
+
flip=False,
|
| 147 |
+
transforms=[
|
| 148 |
+
dict(type='Resize', keep_ratio=True),
|
| 149 |
+
dict(type='RandomFlip'),
|
| 150 |
+
dict(
|
| 151 |
+
type='Normalize',
|
| 152 |
+
mean=[123.675, 116.28, 103.53],
|
| 153 |
+
std=[58.395, 57.12, 57.375],
|
| 154 |
+
to_rgb=True),
|
| 155 |
+
dict(type='Pad', size_divisor=32),
|
| 156 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 157 |
+
dict(type='Collect', keys=['img'])
|
| 158 |
+
])
|
| 159 |
+
]),
|
| 160 |
+
test=dict(
|
| 161 |
+
type='CocoDataset',
|
| 162 |
+
ann_file='/home/share/coco/annotations/instances_val2017.json',
|
| 163 |
+
img_prefix='/home/share/coco/val2017/',
|
| 164 |
+
pipeline=[
|
| 165 |
+
dict(type='LoadImageFromFile'),
|
| 166 |
+
dict(
|
| 167 |
+
type='MultiScaleFlipAug',
|
| 168 |
+
img_scale=(1333, 800),
|
| 169 |
+
flip=False,
|
| 170 |
+
transforms=[
|
| 171 |
+
dict(type='Resize', keep_ratio=True),
|
| 172 |
+
dict(type='RandomFlip'),
|
| 173 |
+
dict(
|
| 174 |
+
type='Normalize',
|
| 175 |
+
mean=[123.675, 116.28, 103.53],
|
| 176 |
+
std=[58.395, 57.12, 57.375],
|
| 177 |
+
to_rgb=True),
|
| 178 |
+
dict(type='Pad', size_divisor=32),
|
| 179 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 180 |
+
dict(type='Collect', keys=['img'])
|
| 181 |
+
])
|
| 182 |
+
]))
|
| 183 |
+
evaluation = dict(metric=['bbox', 'segm'], interval=1, classwise=True)
|
| 184 |
+
model = dict(
|
| 185 |
+
type='MaskRCNN',
|
| 186 |
+
backbone=dict(
|
| 187 |
+
type='InternViTAdapter',
|
| 188 |
+
pretrain_size=448,
|
| 189 |
+
img_size=(448, 448),
|
| 190 |
+
patch_size=16,
|
| 191 |
+
embed_dim=1024,
|
| 192 |
+
depth=24,
|
| 193 |
+
num_heads=16,
|
| 194 |
+
mlp_ratio=4.0,
|
| 195 |
+
drop_path_rate=0.2,
|
| 196 |
+
init_values=1e-05,
|
| 197 |
+
with_cp=True,
|
| 198 |
+
use_flash_attn=False,
|
| 199 |
+
qk_normalization=False,
|
| 200 |
+
layerscale_force_fp32=False,
|
| 201 |
+
with_fpn=False,
|
| 202 |
+
freeze_vit=False,
|
| 203 |
+
use_final_norm=True,
|
| 204 |
+
interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
|
| 205 |
+
cffn_ratio=0.25,
|
| 206 |
+
deform_ratio=0.25,
|
| 207 |
+
qkv_bias=True,
|
| 208 |
+
norm_type='layer_norm',
|
| 209 |
+
pretrained=
|
| 210 |
+
'/home/u1120230285/lyx/InternVL/internvl_chat/work_dirs/ft_full_1b_16ksteps_instruct_tuning_as_pretrain_TMAug75_general/ViTP_general_16k/ViTP_general_16k.safetensors',
|
| 211 |
+
pretrained_type='full',
|
| 212 |
+
only_feat_out=True),
|
| 213 |
+
neck=dict(
|
| 214 |
+
type='FPN',
|
| 215 |
+
in_channels=[1024, 1024, 1024, 1024],
|
| 216 |
+
out_channels=256,
|
| 217 |
+
num_outs=5),
|
| 218 |
+
rpn_head=dict(
|
| 219 |
+
type='RPNHead',
|
| 220 |
+
in_channels=256,
|
| 221 |
+
feat_channels=256,
|
| 222 |
+
anchor_generator=dict(
|
| 223 |
+
type='AnchorGenerator',
|
| 224 |
+
scales=[8],
|
| 225 |
+
ratios=[0.5, 1.0, 2.0],
|
| 226 |
+
strides=[4, 8, 16, 32, 64]),
|
| 227 |
+
bbox_coder=dict(
|
| 228 |
+
type='DeltaXYWHBBoxCoder',
|
| 229 |
+
target_means=[0.0, 0.0, 0.0, 0.0],
|
| 230 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
| 231 |
+
loss_cls=dict(
|
| 232 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
| 233 |
+
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
| 234 |
+
roi_head=dict(
|
| 235 |
+
type='StandardRoIHead',
|
| 236 |
+
bbox_roi_extractor=dict(
|
| 237 |
+
type='SingleRoIExtractor',
|
| 238 |
+
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
|
| 239 |
+
out_channels=256,
|
| 240 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 241 |
+
bbox_head=dict(
|
| 242 |
+
type='ConvFCBBoxHead',
|
| 243 |
+
num_shared_convs=4,
|
| 244 |
+
num_shared_fcs=1,
|
| 245 |
+
in_channels=256,
|
| 246 |
+
conv_out_channels=256,
|
| 247 |
+
fc_out_channels=1024,
|
| 248 |
+
roi_feat_size=7,
|
| 249 |
+
num_classes=80,
|
| 250 |
+
bbox_coder=dict(
|
| 251 |
+
type='DeltaXYWHBBoxCoder',
|
| 252 |
+
target_means=[0.0, 0.0, 0.0, 0.0],
|
| 253 |
+
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
| 254 |
+
reg_class_agnostic=False,
|
| 255 |
+
reg_decoded_bbox=True,
|
| 256 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 257 |
+
loss_cls=dict(
|
| 258 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
| 259 |
+
loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),
|
| 260 |
+
mask_roi_extractor=dict(
|
| 261 |
+
type='SingleRoIExtractor',
|
| 262 |
+
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
|
| 263 |
+
out_channels=256,
|
| 264 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 265 |
+
mask_head=dict(
|
| 266 |
+
type='FCNMaskHead',
|
| 267 |
+
num_convs=4,
|
| 268 |
+
in_channels=256,
|
| 269 |
+
conv_out_channels=256,
|
| 270 |
+
num_classes=80,
|
| 271 |
+
loss_mask=dict(
|
| 272 |
+
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
|
| 273 |
+
train_cfg=dict(
|
| 274 |
+
rpn=dict(
|
| 275 |
+
assigner=dict(
|
| 276 |
+
type='MaxIoUAssigner',
|
| 277 |
+
pos_iou_thr=0.7,
|
| 278 |
+
neg_iou_thr=0.3,
|
| 279 |
+
min_pos_iou=0.3,
|
| 280 |
+
match_low_quality=True,
|
| 281 |
+
gpu_assign_thr=300,
|
| 282 |
+
ignore_iof_thr=-1),
|
| 283 |
+
sampler=dict(
|
| 284 |
+
type='RandomSampler',
|
| 285 |
+
num=256,
|
| 286 |
+
pos_fraction=0.5,
|
| 287 |
+
neg_pos_ub=-1,
|
| 288 |
+
add_gt_as_proposals=False),
|
| 289 |
+
allowed_border=-1,
|
| 290 |
+
pos_weight=-1,
|
| 291 |
+
debug=False),
|
| 292 |
+
rpn_proposal=dict(
|
| 293 |
+
nms_pre=2000,
|
| 294 |
+
max_per_img=1000,
|
| 295 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
| 296 |
+
min_bbox_size=0),
|
| 297 |
+
rcnn=dict(
|
| 298 |
+
assigner=dict(
|
| 299 |
+
type='MaxIoUAssigner',
|
| 300 |
+
pos_iou_thr=0.5,
|
| 301 |
+
neg_iou_thr=0.5,
|
| 302 |
+
min_pos_iou=0.5,
|
| 303 |
+
match_low_quality=True,
|
| 304 |
+
gpu_assign_thr=300,
|
| 305 |
+
ignore_iof_thr=-1),
|
| 306 |
+
sampler=dict(
|
| 307 |
+
type='RandomSampler',
|
| 308 |
+
num=512,
|
| 309 |
+
pos_fraction=0.25,
|
| 310 |
+
neg_pos_ub=-1,
|
| 311 |
+
add_gt_as_proposals=True),
|
| 312 |
+
mask_size=28,
|
| 313 |
+
pos_weight=-1,
|
| 314 |
+
debug=False)),
|
| 315 |
+
test_cfg=dict(
|
| 316 |
+
rpn=dict(
|
| 317 |
+
nms_across_levels=False,
|
| 318 |
+
nms_pre=1000,
|
| 319 |
+
max_per_img=1000,
|
| 320 |
+
nms_post=1000,
|
| 321 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
| 322 |
+
min_bbox_size=0),
|
| 323 |
+
rcnn=dict(
|
| 324 |
+
score_thr=0.05,
|
| 325 |
+
nms=dict(type='nms', iou_threshold=0.5),
|
| 326 |
+
max_per_img=100,
|
| 327 |
+
mask_thr_binary=0.5)))
|
| 328 |
+
optimizer_config = dict(grad_clip=None)
|
| 329 |
+
runner = dict(type='EpochBasedRunner', max_epochs=12)
|
| 330 |
+
optimizer = dict(
|
| 331 |
+
type='AdamW',
|
| 332 |
+
lr=3e-05,
|
| 333 |
+
betas=(0.9, 0.999),
|
| 334 |
+
weight_decay=0.05,
|
| 335 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 336 |
+
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.85))
|
| 337 |
+
lr_config = dict(
|
| 338 |
+
policy='step',
|
| 339 |
+
warmup='linear',
|
| 340 |
+
warmup_iters=500,
|
| 341 |
+
warmup_ratio=0.001,
|
| 342 |
+
step=[8, 11])
|
| 343 |
+
checkpoint_config = dict(interval=1)
|
| 344 |
+
log_config = dict(interval=1000, hooks=[dict(type='TextLoggerHook')])
|
| 345 |
+
custom_hooks = [dict(type='NumClassCheckHook')]
|
| 346 |
+
dist_params = dict(backend='nccl')
|
| 347 |
+
log_level = 'INFO'
|
| 348 |
+
load_from = None
|
| 349 |
+
resume_from = None
|
| 350 |
+
workflow = [('train', 1)]
|
| 351 |
+
opencv_num_threads = 0
|
| 352 |
+
mp_start_method = 'fork'
|
| 353 |
+
auto_scale_lr = dict(enable=False, base_batch_size=16)
|
| 354 |
+
auto_resume = False
|
| 355 |
+
gpu_ids = range(0, 8)
|
| 356 |
+
device = 'cuda'
|
| 357 |
+
work_dir = './work_dirs/vitp_coco_maskrcnn_bs8_lr3e-5_dpr03_ld_60'
|